From c52246f53b0e8e0b9b8f3f9a0beb9209a83d9d80 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 2 May 2025 12:55:13 +0200 Subject: [PATCH 001/172] refactor symbol handling in assembly to have more structure explicit Label/PUSHLABEL instructions. --- vyper/ir/compile_ir.py | 133 +++++++++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 52 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 936e6d5d72..51ac7bbfe5 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import copy import functools import math @@ -43,20 +45,45 @@ def PUSH_N(x, n): assert x == 0 return [f"PUSH{len(o)}"] + o +def JUMP(label: Label): + return [PUSHLABEL(label), "JUMP"] + +def JUMPI(label: Label): + return [PUSHLABEL(label), "JUMPI"] + +class Label: + _next_symbol: int = 0 + + def __init__(self, label: str): + self.label = label + + def __repr__(self): + return f"LABEL {self.label}" + + def __eq__(self, other): + if not isinstance(other, Label): + return False + return self.label == other.label -_next_symbol = 0 + def __hash__(self): + return hash(self.label) +class PUSHLABEL: + + def __init__(self, label: Label): + self.label = label + + def __str__(self): + return f"PUSHLABEL {self.label.label}" def mksymbol(name=""): - global _next_symbol - _next_symbol += 1 + Label._next_symbol += 1 - return f"_sym_{name}{_next_symbol}" + return Label(f"{name}{Label._next_symbol}") def reset_symbols(): - global _next_symbol - _next_symbol = 0 + Label._next_symbol = 0 def mkdebug(pc_debugger, ast_source): @@ -66,7 +93,7 @@ def mkdebug(pc_debugger, ast_source): def is_symbol(i): - return isinstance(i, str) and i.startswith("_sym_") + return isinstance(i, Label) # basically something like a symbol which gets resolved @@ -152,7 +179,7 @@ def _assert_false(): # use a shared failure block for common case of assert(x). # in the future we might want to change the code # at _sym_revert0 to: INVALID - return [_revert_label, "JUMPI"] + return JUMPI(revert_label) def _add_postambles(asm_ops): @@ -160,7 +187,7 @@ def _add_postambles(asm_ops): global _revert_label - _revert_string = [_revert_label, "JUMPDEST", *PUSH(0), "DUP1", "REVERT"] + _revert_string = [_revert_label, *PUSH(0), "DUP1", "REVERT"] if _revert_label in asm_ops: # shared failure block @@ -359,9 +386,9 @@ def _height_of(witharg): o = [] o.extend(_compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height)) end_symbol = mksymbol("join") - o.extend(["ISZERO", end_symbol, "JUMPI"]) + o.extend(["ISZERO", *JUMPI(end_symbol)]) o.extend(_compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) - o.extend([end_symbol, "JUMPDEST"]) + o.extend([end_symbol]) return o # If statements (3 arguments, ie. if x: y, else: z) elif code.value == "if" and len(code.args) == 3: @@ -369,11 +396,11 @@ def _height_of(witharg): o.extend(_compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height)) mid_symbol = mksymbol("else") end_symbol = mksymbol("join") - o.extend(["ISZERO", mid_symbol, "JUMPI"]) + o.extend(["ISZERO", *JUMPI(mid_symbol)]) o.extend(_compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) - o.extend([end_symbol, "JUMP", mid_symbol, "JUMPDEST"]) + o.extend([*JUMPI(end_symbol), mid_symbol]) o.extend(_compile_to_assembly(code.args[2], withargs, existing_labels, break_dest, height)) - o.extend([end_symbol, "JUMPDEST"]) + o.extend([end_symbol]) return o # repeat(counter_location, start, rounds, rounds_bound, body) @@ -425,7 +452,7 @@ def _height_of(witharg): # stack: i, rounds # if (0 == rounds) { goto end_dest; } - o.extend(["DUP1", "ISZERO", exit_dest, "JUMPI"]) + o.extend(["DUP1", "ISZERO", *JUMPI(exit_dest)]) # stack: start, rounds if start.value != 0: @@ -439,7 +466,7 @@ def _height_of(witharg): withargs[i_name.value] = height + 1 # stack: exit_i, i - o.extend([entry_dest, "JUMPDEST"]) + o.extend([entry_dest]) o.extend( _compile_to_assembly( body, withargs, existing_labels, (exit_dest, continue_dest, height + 2), height + 2 @@ -453,12 +480,12 @@ def _height_of(witharg): # stack: exit_i, i # increment i: - o.extend([continue_dest, "JUMPDEST", "PUSH1", 1, "ADD"]) + o.extend([continue_dest, "PUSH1", 1, "ADD"]) # stack: exit_i, i+1 (new_i) # if (exit_i != new_i) { goto entry_dest } - o.extend(["DUP2", "DUP2", "XOR", entry_dest, "JUMPI"]) - o.extend([exit_dest, "JUMPDEST", "POP", "POP"]) + o.extend(["DUP2", "DUP2", "XOR", *JUMPI(entry_dest)]) + o.extend([exit_dest, "POP", "POP"]) return o @@ -467,7 +494,7 @@ def _height_of(witharg): if not break_dest: raise CompilerPanic("Invalid break") dest, continue_dest, break_height = break_dest - return [continue_dest, "JUMP"] + return [*JUMP(continue_dest)] # Break from inside a for loop elif code.value == "break": if not break_dest: @@ -477,7 +504,7 @@ def _height_of(witharg): n_local_vars = height - break_height # clean up any stack items declared in the loop body cleanup_local_vars = ["POP"] * n_local_vars - return cleanup_local_vars + [dest, "JUMP"] + return cleanup_local_vars + [*JUMP(dest)] # Break from inside one or more for loops prior to a return statement inside the loop elif code.value == "cleanup_repeat": if not break_dest: @@ -524,10 +551,10 @@ def _height_of(witharg): o = [] # COPY the code to memory for deploy - o.extend(["_sym_subcode_size", runtime_begin, "_mem_deploy_start", "CODECOPY"]) + o.extend([PUSHLABEL(Label("subcode_size")), runtime_begin, "_mem_deploy_start", "CODECOPY"]) # calculate the len of runtime code - o.extend(["_OFST", "_sym_subcode_size", immutables_len]) # stack: len + o.extend(["_OFST", Label("subcode_size"), immutables_len]) # stack: len o.extend(["_mem_deploy_start"]) # stack: len mem_ofst o.extend(["RETURN"]) @@ -560,7 +587,7 @@ def _height_of(witharg): elif code.value == "assert_unreachable": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) end_symbol = mksymbol("reachable") - o.extend([end_symbol, "JUMPI", "INVALID", end_symbol, "JUMPDEST"]) + o.extend([*JUMPI(end_symbol), "INVALID", end_symbol]) return o # Assert (if false, exit) elif code.value == "assert": @@ -704,7 +731,7 @@ def _height_of(witharg): o = [] for i, c in enumerate(reversed(code.args[1:])): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) - o.extend(["_sym_" + code.args[0].value, "JUMP"]) + o.extend([*JUMP(Label(code.args[0].value))]) return o elif code.value == "djump": o = [] @@ -715,7 +742,7 @@ def _height_of(witharg): return o # push a literal symbol elif code.value == "symbol": - return ["_sym_" + code.args[0].value] + return [PUSHLABEL(Label(code.args[0].value))] # set a symbol as a location. elif code.value == "label": label_name = code.args[0].value @@ -751,7 +778,7 @@ def _height_of(witharg): # label params to be consumed implicitly pop_scoped_vars = [] - return ["_sym_" + label_name, "JUMPDEST"] + body_asm + pop_scoped_vars + return [Label(label_name)] + body_asm + pop_scoped_vars elif code.value == "unique_symbol": symbol = code.args[0].value @@ -819,9 +846,8 @@ def _prune_unreachable_code(assembly): # find the next jumpdest or sublist for j in range(i + 1, len(assembly)): next_is_jumpdest = ( - j < len(assembly) - 1 + j < len(assembly) and is_symbol(assembly[j]) - and assembly[j + 1] == "JUMPDEST" ) next_is_list = isinstance(assembly[j], list) if next_is_jumpdest or next_is_list: @@ -839,17 +865,17 @@ def _prune_unreachable_code(assembly): def _prune_inefficient_jumps(assembly): - # prune sequences `_sym_x JUMP _sym_x JUMPDEST` to `_sym_x JUMPDEST` + # prune sequences `PUSHLABEL x JUMP LABEL x` to `LABEL x` changed = False i = 0 while i < len(assembly) - 4: if ( - is_symbol(assembly[i]) + isinstance(assembly[i], PUSHLABEL) and assembly[i + 1] == "JUMP" - and assembly[i] == assembly[i + 2] - and assembly[i + 3] == "JUMPDEST" + and is_symbol(assembly[i+2]) + and assembly[i + 2] == assembly[i].label ): - # delete _sym_x JUMP + # delete PUSHLABEL x JUMP changed = True del assembly[i : i + 2] else: @@ -859,8 +885,9 @@ def _prune_inefficient_jumps(assembly): def _optimize_inefficient_jumps(assembly): - # optimize sequences `_sym_common JUMPI _sym_x JUMP _sym_common JUMPDEST` - # to `ISZERO _sym_x JUMPI _sym_common JUMPDEST` + # optimize sequences + # `PUSHLABEL common JUMPI PUSHLABEL x JUMP LABEL common` + # to `ISZERO PUSHLABEL x JUMPI LABEL common` changed = False i = 0 while i < len(assembly) - 6: @@ -1250,26 +1277,25 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) # update pc if is_symbol(item): - if is_symbol_map_indicator(assembly[i + 1]): - # Don't increment pc as the symbol itself doesn't go into code - if item in symbol_map: - raise CompilerPanic(f"duplicate jumpdest {item}") + if item in symbol_map: + raise CompilerPanic(f"duplicate {item}") + # Don't increment pc as the symbol itself doesn't go into code + symbol_map[item] = pc - symbol_map[item] = pc - else: - pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits + if isinstance(item, PUSHLABEL): + pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits elif is_mem_sym(item): # PUSH item pc += mem_ofst_size + 1 elif is_ofst(item): - assert is_symbol(assembly[i + 1]) or is_mem_sym(assembly[i + 1]) + assert is_symbol(assembly[i + 1]) or is_mem_sym(assembly[i + 1]), assembly[i + 1] assert isinstance(assembly[i + 2], int) # [_OFST, _sym_foo, bar] -> PUSH2 (foo+bar) # [_OFST, _mem_foo, bar] -> PUSHN (foo+bar) pc -= 1 elif isinstance(item, list) and isinstance(item[0], RuntimeHeader): # we are in initcode - symbol_map[item[0].label] = pc + symbol_map[Label(item[0].label)] = pc # add source map for all items in the runtime map t = adjust_pc_maps(runtime_map, pc) for key in line_number_map: @@ -1282,7 +1308,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) data_section_lengths.append(_length_of_data(t)) elif isinstance(item, list) and isinstance(item[0], DataHeader): - symbol_map[item[0].label] = pc + symbol_map[Label(item[0].label)] = pc pc += _length_of_data(item) else: pc += 1 @@ -1306,11 +1332,11 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) pc += len(bytecode_suffix) - symbol_map["_sym_code_end"] = pc + symbol_map[Label("code_end")] = pc symbol_map["_mem_deploy_start"] = runtime_code_start symbol_map["_mem_deploy_end"] = runtime_code_end if runtime_code is not None: - symbol_map["_sym_subcode_size"] = len(runtime_code) + symbol_map[Label("subcode_size")] = len(runtime_code) # TODO refactor into two functions, create symbol_map and assemble @@ -1327,11 +1353,14 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) if item in ("DEBUG",): continue # skippable opcodes - elif is_symbol(item): + elif isinstance(item, PUSHLABEL): # push a symbol to stack - if not is_symbol_map_indicator(assembly[i + 1]): - bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=SYMBOL_SIZE)) - ret.extend(bytecode) + label = item.label + bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[label], n=SYMBOL_SIZE)) + ret.extend(bytecode) + + elif isinstance(item, Label): + ret.append(get_opcodes()["JUMPDEST"][0]) elif is_mem_sym(item): bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=mem_ofst_size)) From 09322732a2076606b1fa40d02fb825e24aa7ee71 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 5 May 2025 12:04:14 +0200 Subject: [PATCH 002/172] wip --- vyper/codegen/module.py | 5 +++-- vyper/ir/compile_ir.py | 6 +++--- vyper/venom/venom_to_assembly.py | 7 ++++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/vyper/codegen/module.py b/vyper/codegen/module.py index 56a8da0f79..132a3e3b8d 100644 --- a/vyper/codegen/module.py +++ b/vyper/codegen/module.py @@ -12,6 +12,7 @@ from vyper.codegen.ir_node import IRnode from vyper.compiler.settings import _is_debug_mode from vyper.exceptions import CompilerPanic +from vyper.ir.compile_ir import Label from vyper.semantics.types.module import ModuleT from vyper.utils import OrderedSet, method_id_int @@ -310,10 +311,10 @@ def _selector_section_sparse(external_functions, module_t): for i in range(n_buckets): if i in buckets: bucket_label = f"selector_bucket_{i}" - jump_targets.append(bucket_label) + jump_targets.append(Label(bucket_label)) else: # empty bucket - jump_targets.append("fallback") + jump_targets.append(Label("fallback")) jumptable_data = ["data", "selector_buckets"] jumptable_data.extend(["symbol", label] for label in jump_targets) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 51ac7bbfe5..f161bfaa4e 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -179,7 +179,7 @@ def _assert_false(): # use a shared failure block for common case of assert(x). # in the future we might want to change the code # at _sym_revert0 to: INVALID - return JUMPI(revert_label) + return JUMPI(_revert_label) def _add_postambles(asm_ops): @@ -707,7 +707,7 @@ def _height_of(witharg): ) elif code.value == "data": - data_node = [DataHeader("_sym_" + code.args[0].value)] + data_node = [DataHeader(Label("_sym_" + code.args[0].value))] for c in code.args[1:]: if isinstance(c.value, int): @@ -1160,7 +1160,7 @@ def __repr__(self): @dataclass class DataHeader: - label: str + label: Label def __repr__(self): return f"DATA {self.label}" diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 4c5a2bfcda..47966c26a8 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -5,6 +5,7 @@ PUSH, DataHeader, Instruction, + Label, RuntimeHeader, mksymbol, optimize_assembly, @@ -167,7 +168,7 @@ def generate_evm(self, no_optimize: bool = False) -> list[str]: self._generate_evm_for_basicblock_r(asm, fn.entry, StackModel()) # TODO make this property on IRFunction - asm.extend(["_sym__ctor_exit", "JUMPDEST"]) + asm.extend([Label("_sym__ctor_exit"), "JUMPDEST"]) if ctx.immutables_len is not None and ctx.ctor_mem_size is not None: asm.extend( ["_sym_subcode_size", "_sym_runtime_begin", "_mem_deploy_start", "CODECOPY"] @@ -192,7 +193,7 @@ def generate_evm(self, no_optimize: bool = False) -> list[str]: for item in data_section.data_items: data = item.data if isinstance(data, IRLabel): - asm_data_section.append(_as_asm_symbol(data)) + asm_data_section.append(Label(_as_asm_symbol(data))) else: assert isinstance(data, bytes) asm_data_section.append(data) @@ -337,7 +338,7 @@ def _generate_evm_for_basicblock_r( asm = [] # assembly entry point into the block - asm.append(_as_asm_symbol(basicblock.label)) + asm.append(Label(_as_asm_symbol(basicblock.label))) asm.append("JUMPDEST") fn = basicblock.parent From a07684cc7f7d62ecda0cbd0cfa1c62159bad3248 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 5 May 2025 12:15:38 +0200 Subject: [PATCH 003/172] wip --- vyper/codegen/module.py | 4 ++-- vyper/ir/compile_ir.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vyper/codegen/module.py b/vyper/codegen/module.py index 132a3e3b8d..63c6a8f488 100644 --- a/vyper/codegen/module.py +++ b/vyper/codegen/module.py @@ -311,10 +311,10 @@ def _selector_section_sparse(external_functions, module_t): for i in range(n_buckets): if i in buckets: bucket_label = f"selector_bucket_{i}" - jump_targets.append(Label(bucket_label)) + jump_targets.append(bucket_label) else: # empty bucket - jump_targets.append(Label("fallback")) + jump_targets.append("fallback") jumptable_data = ["data", "selector_buckets"] jumptable_data.extend(["symbol", label] for label in jump_targets) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index f161bfaa4e..3cf8a89650 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -737,7 +737,7 @@ def _height_of(witharg): o = [] # "djump" compiles to a raw EVM jump instruction jump_target = code.args[0] - o.extend(_compile_to_assembly(jump_target, withargs, existing_labels, break_dest, height)) + o.extend(_compile_to_assembly(Label(jump_target), withargs, existing_labels, break_dest, height)) o.append("JUMP") return o # push a literal symbol From e1db4210f1ff550fdf6de5aa9c2add38d2e5c0c0 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 5 May 2025 12:36:39 +0200 Subject: [PATCH 004/172] charles --- vyper/ir/compile_ir.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 3cf8a89650..de139c02db 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -258,8 +258,8 @@ def compile_to_assembly(code, optimize=OptimizationLevel.GAS): _relocate_segments(res) - if optimize != OptimizationLevel.NONE: - optimize_assembly(res) + # if optimize != OptimizationLevel.NONE: + # optimize_assembly(res) return res @@ -272,7 +272,7 @@ def _compile_to_assembly(code, withargs=None, existing_labels=None, break_dest=N raise CompilerPanic(f"Incorrect type for withargs: {type(withargs)}") def _data_ofst_of(sym, ofst, height_): - # e.g. _OFST _sym_foo 32 + # e.g. _OFST Label foo 32 assert is_symbol(sym) or is_mem_sym(sym) if isinstance(ofst.value, int): # resolve at compile time using magic _OFST op @@ -336,7 +336,7 @@ def _height_of(witharg): o = [] # codecopy 32 bytes to FREE_VAR_SPACE, then mload from FREE_VAR_SPACE o.extend(PUSH(32)) - o.extend(_data_ofst_of("_sym_code_end", loc, height + 1)) + o.extend(_data_ofst_of("code_end", loc, height + 1)) o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["CODECOPY"]) o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["MLOAD"]) return o @@ -350,7 +350,7 @@ def _height_of(witharg): o = [] o.extend(_compile_to_assembly(len_, withargs, existing_labels, break_dest, height)) - o.extend(_data_ofst_of("_sym_code_end", src, height + 1)) + o.extend(_data_ofst_of("code_end", src, height + 1)) o.extend(_compile_to_assembly(dst, withargs, existing_labels, break_dest, height + 2)) o.extend(["CODECOPY"]) return o @@ -707,7 +707,7 @@ def _height_of(witharg): ) elif code.value == "data": - data_node = [DataHeader(Label("_sym_" + code.args[0].value))] + data_node = [DataHeader(Label(code.args[0].value))] for c in code.args[1:]: if isinstance(c.value, int): @@ -998,7 +998,7 @@ def _merge_iszero(assembly): # this helper function tells us if we want to add the previous instruction # to the symbol map. def is_symbol_map_indicator(asm_node): - return asm_node == "JUMPDEST" + return isinstance(asm_node, Label) def _prune_unused_jumpdests(assembly): From 00db4df5e4903e6f7d43cede5fd73c93f7703d3e Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 5 May 2025 12:43:12 +0200 Subject: [PATCH 005/172] fix for postambles --- vyper/ir/compile_ir.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index de139c02db..fc1d9877b3 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -45,17 +45,20 @@ def PUSH_N(x, n): assert x == 0 return [f"PUSH{len(o)}"] + o + def JUMP(label: Label): return [PUSHLABEL(label), "JUMP"] + def JUMPI(label: Label): return [PUSHLABEL(label), "JUMPI"] + class Label: _next_symbol: int = 0 def __init__(self, label: str): - self.label = label + self.label = label def __repr__(self): return f"LABEL {self.label}" @@ -68,14 +71,23 @@ def __eq__(self, other): def __hash__(self): return hash(self.label) -class PUSHLABEL: +class PUSHLABEL: def __init__(self, label: Label): self.label = label def __str__(self): return f"PUSHLABEL {self.label.label}" + def __eq__(self, other): + if not isinstance(other, PUSHLABEL): + return False + return self.label == other.label + + def __hash__(self): + return hash(self.label) + + def mksymbol(name=""): Label._next_symbol += 1 @@ -189,7 +201,7 @@ def _add_postambles(asm_ops): _revert_string = [_revert_label, *PUSH(0), "DUP1", "REVERT"] - if _revert_label in asm_ops: + if PUSHLABEL(_revert_label) in asm_ops: # shared failure block to_append.extend(_revert_string) From 5e83ccc8da19a30682c3ac822115925b9a451ad8 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 5 May 2025 12:55:13 +0200 Subject: [PATCH 006/172] more fixes --- vyper/ir/compile_ir.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index fc1d9877b3..5ad74bab1a 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -348,7 +348,7 @@ def _height_of(witharg): o = [] # codecopy 32 bytes to FREE_VAR_SPACE, then mload from FREE_VAR_SPACE o.extend(PUSH(32)) - o.extend(_data_ofst_of("code_end", loc, height + 1)) + o.extend(_data_ofst_of(Label("code_end"), loc, height + 1)) o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["CODECOPY"]) o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["MLOAD"]) return o @@ -362,7 +362,7 @@ def _height_of(witharg): o = [] o.extend(_compile_to_assembly(len_, withargs, existing_labels, break_dest, height)) - o.extend(_data_ofst_of("code_end", src, height + 1)) + o.extend(_data_ofst_of(Label("code_end"), src, height + 1)) o.extend(_compile_to_assembly(dst, withargs, existing_labels, break_dest, height + 2)) o.extend(["CODECOPY"]) return o @@ -729,9 +729,9 @@ def _height_of(witharg): data_node.append(c.value) elif isinstance(c, IRnode): assert c.value == "symbol" - data_node.extend( - _compile_to_assembly(c, withargs, existing_labels, break_dest, height) - ) + assert len(c.args) == 1 + assert isinstance(c.args[0].value, str), (type(c.args[0].value), c) + data_node.append(Label(c.args[0].value)) else: raise ValueError(f"Invalid data: {type(c)} {c}") @@ -749,7 +749,9 @@ def _height_of(witharg): o = [] # "djump" compiles to a raw EVM jump instruction jump_target = code.args[0] - o.extend(_compile_to_assembly(Label(jump_target), withargs, existing_labels, break_dest, height)) + o.extend( + _compile_to_assembly(jump_target, withargs, existing_labels, break_dest, height) + ) o.append("JUMP") return o # push a literal symbol @@ -857,10 +859,7 @@ def _prune_unreachable_code(assembly): if assembly[i] in _TERMINAL_OPS: # find the next jumpdest or sublist for j in range(i + 1, len(assembly)): - next_is_jumpdest = ( - j < len(assembly) - and is_symbol(assembly[j]) - ) + next_is_jumpdest = j < len(assembly) and is_symbol(assembly[j]) next_is_list = isinstance(assembly[j], list) if next_is_jumpdest or next_is_list: break @@ -884,7 +883,7 @@ def _prune_inefficient_jumps(assembly): if ( isinstance(assembly[i], PUSHLABEL) and assembly[i + 1] == "JUMP" - and is_symbol(assembly[i+2]) + and is_symbol(assembly[i + 2]) and assembly[i + 2] == assembly[i].label ): # delete PUSHLABEL x JUMP @@ -1175,7 +1174,7 @@ class DataHeader: label: Label def __repr__(self): - return f"DATA {self.label}" + return f"DATA {self.label.label}" def _relocate_segments(assembly): @@ -1320,7 +1319,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) data_section_lengths.append(_length_of_data(t)) elif isinstance(item, list) and isinstance(item[0], DataHeader): - symbol_map[Label(item[0].label)] = pc + symbol_map[item[0].label] = pc pc += _length_of_data(item) else: pc += 1 From 1f68273004d2a164523ebfa03702820700e4b821 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 5 May 2025 12:55:19 +0200 Subject: [PATCH 007/172] sanity help --- vyper/ir/compile_ir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 5ad74bab1a..8d6cbb54cc 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -285,7 +285,7 @@ def _compile_to_assembly(code, withargs=None, existing_labels=None, break_dest=N def _data_ofst_of(sym, ofst, height_): # e.g. _OFST Label foo 32 - assert is_symbol(sym) or is_mem_sym(sym) + assert is_symbol(sym) or is_mem_sym(sym), sym if isinstance(ofst.value, int): # resolve at compile time using magic _OFST op return ["_OFST", sym, ofst.value] From 82b87b1f08a0e27e2001a92e13c5a5fdf140fff3 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 5 May 2025 13:00:54 +0200 Subject: [PATCH 008/172] small fixes --- vyper/ir/compile_ir.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 8d6cbb54cc..680297b1e3 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1273,8 +1273,8 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) # update pc_jump_map if item == "JUMP": last = assembly[i - 1] - if is_symbol(last) and last.startswith("_sym_internal"): - if last.endswith("cleanup"): + if is_symbol(last) and last.label.startswith("internal"): + if last.label.endswith("cleanup"): # exit an internal function line_number_map["pc_jump_map"][pc] = "o" else: From b85f758910d0787be10207495724598a6b9c7974 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 5 May 2025 13:14:42 +0200 Subject: [PATCH 009/172] introduce a performance regression --- vyper/ir/compile_ir.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 680297b1e3..eeefc4bc51 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -286,6 +286,15 @@ def _compile_to_assembly(code, withargs=None, existing_labels=None, break_dest=N def _data_ofst_of(sym, ofst, height_): # e.g. _OFST Label foo 32 assert is_symbol(sym) or is_mem_sym(sym), sym + + # simple way -- reintroduce compile-time resolution later + ofst = _compile_to_assembly(ofst, withargs, existing_labels, break_dest, height_) + if is_symbol(sym): + return ofst + [PUSHLABEL(sym), "ADD"] + else: + # magic for mem syms + return ofst + [sym, "ADD"] + if isinstance(ofst.value, int): # resolve at compile time using magic _OFST op return ["_OFST", sym, ofst.value] @@ -566,7 +575,7 @@ def _height_of(witharg): o.extend([PUSHLABEL(Label("subcode_size")), runtime_begin, "_mem_deploy_start", "CODECOPY"]) # calculate the len of runtime code - o.extend(["_OFST", Label("subcode_size"), immutables_len]) # stack: len + o.extend(_data_ofst_of(Label("subcode_size"), IRnode(immutables_len), height)) o.extend(["_mem_deploy_start"]) # stack: len mem_ofst o.extend(["RETURN"]) From 75838b7ffd1356d6e64174fbf14ae6329ad4ee1b Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 5 May 2025 13:25:45 +0200 Subject: [PATCH 010/172] fix PUSHLABEL for runtime_begin --- vyper/ir/compile_ir.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index eeefc4bc51..bc45ca48ee 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -572,7 +572,7 @@ def _height_of(witharg): o = [] # COPY the code to memory for deploy - o.extend([PUSHLABEL(Label("subcode_size")), runtime_begin, "_mem_deploy_start", "CODECOPY"]) + o.extend([PUSHLABEL(Label("subcode_size")), PUSHLABEL(runtime_begin), "_mem_deploy_start", "CODECOPY"]) # calculate the len of runtime code o.extend(_data_ofst_of(Label("subcode_size"), IRnode(immutables_len), height)) @@ -1170,7 +1170,7 @@ def _length_of_data(assembly): @dataclass class RuntimeHeader: - label: str + label: Label ctor_mem_size: int immutables_len: int @@ -1315,7 +1315,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) pc -= 1 elif isinstance(item, list) and isinstance(item[0], RuntimeHeader): # we are in initcode - symbol_map[Label(item[0].label)] = pc + symbol_map[item[0].label] = pc # add source map for all items in the runtime map t = adjust_pc_maps(runtime_map, pc) for key in line_number_map: From beeeca781481f75411d59ac0fe27f3cb46cfcd29 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 5 May 2025 13:31:06 +0200 Subject: [PATCH 011/172] fix bad instruction --- vyper/ir/compile_ir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index bc45ca48ee..aaaaca1092 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -419,7 +419,7 @@ def _height_of(witharg): end_symbol = mksymbol("join") o.extend(["ISZERO", *JUMPI(mid_symbol)]) o.extend(_compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) - o.extend([*JUMPI(end_symbol), mid_symbol]) + o.extend([*JUMP(end_symbol), mid_symbol]) o.extend(_compile_to_assembly(code.args[2], withargs, existing_labels, break_dest, height)) o.extend([end_symbol]) return o From f65a94e5dcc75fbaef48c41946db960b20ef862a Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 5 May 2025 16:45:17 +0200 Subject: [PATCH 012/172] wip --- vyper/ir/compile_ir.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index aaaaca1092..0e85ec47ff 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -12,7 +12,7 @@ from vyper.evm.opcodes import get_opcodes, version_check from vyper.exceptions import CodegenPanic, CompilerPanic from vyper.ir.optimizer import COMMUTATIVE_OPS -from vyper.utils import MemoryPositions +from vyper.utils import MemoryPositions, OrderedSet from vyper.version import version_tuple PUSH_OFFSET = 0x5F @@ -270,8 +270,8 @@ def compile_to_assembly(code, optimize=OptimizationLevel.GAS): _relocate_segments(res) - # if optimize != OptimizationLevel.NONE: - # optimize_assembly(res) + if optimize != OptimizationLevel.NONE: + optimize_assembly(res) return res @@ -1013,22 +1013,14 @@ def _merge_iszero(assembly): return changed -# a symbol _sym_x in assembly can either mean to push _sym_x to the stack, -# or it can precede a location in code which we want to add to symbol map. -# this helper function tells us if we want to add the previous instruction -# to the symbol map. -def is_symbol_map_indicator(asm_node): - return isinstance(asm_node, Label) - - def _prune_unused_jumpdests(assembly): changed = False - used_jumpdests = set() + used_jumpdests = OrderedSet() # find all used jumpdests for i in range(len(assembly) - 1): - if is_symbol(assembly[i]) and not is_symbol_map_indicator(assembly[i + 1]): + if is_symbol(assembly[i]) and assembly[i + 1] != "JUMPDEST": used_jumpdests.add(assembly[i]) for item in assembly: @@ -1082,7 +1074,7 @@ def _stack_peephole_opts(assembly): ): changed = True del assembly[i : i + 2] - if assembly[i] == "SWAP1" and assembly[i + 1].lower() in COMMUTATIVE_OPS: + if assembly[i] == "SWAP1" and str(assembly[i + 1]).lower() in COMMUTATIVE_OPS: changed = True del assembly[i] if assembly[i] == "DUP1" and assembly[i + 1] == "SWAP1": From 304d5aeca8998f20b068b83f0999e78bb85a1c6c Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 5 May 2025 17:14:41 +0200 Subject: [PATCH 013/172] update optimizations --- vyper/ir/compile_ir.py | 52 +++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 0e85ec47ff..08fc962346 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -357,6 +357,7 @@ def _height_of(witharg): o = [] # codecopy 32 bytes to FREE_VAR_SPACE, then mload from FREE_VAR_SPACE o.extend(PUSH(32)) + o.extend(_data_ofst_of(Label("code_end"), loc, height + 1)) o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["CODECOPY"]) o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["MLOAD"]) @@ -888,7 +889,7 @@ def _prune_inefficient_jumps(assembly): # prune sequences `PUSHLABEL x JUMP LABEL x` to `LABEL x` changed = False i = 0 - while i < len(assembly) - 4: + while i < len(assembly) - 2: if ( isinstance(assembly[i], PUSHLABEL) and assembly[i + 1] == "JUMP" @@ -910,14 +911,14 @@ def _optimize_inefficient_jumps(assembly): # to `ISZERO PUSHLABEL x JUMPI LABEL common` changed = False i = 0 - while i < len(assembly) - 6: + while i < len(assembly) - 4: if ( - is_symbol(assembly[i]) + isinstance(assembly[i], PUSHLABEL) and assembly[i + 1] == "JUMPI" - and is_symbol(assembly[i + 2]) + and isinstance(assembly[i + 2], PUSHLABEL) and assembly[i + 3] == "JUMP" - and assembly[i] == assembly[i + 4] - and assembly[i + 5] == "JUMPDEST" + and isinstance(assembly[i + 4], Label) + and assembly[i].label == assembly[i + 4] ): changed = True assembly[i] = "ISZERO" @@ -939,26 +940,25 @@ def _merge_jumpdests(assembly): changed = False i = 0 while i < len(assembly) - 3: - if is_symbol(assembly[i]) and assembly[i + 1] == "JUMPDEST": + #if is_symbol(assembly[i]) and assembly[i + 1] == "JUMPDEST": + if is_symbol(assembly[i]): current_symbol = assembly[i] - if is_symbol(assembly[i + 2]) and assembly[i + 3] == "JUMPDEST": - # _sym_x JUMPDEST _sym_y JUMPDEST - # replace all instances of _sym_x with _sym_y - # (except for _sym_x JUMPDEST - don't want duplicate labels) + if is_symbol(assembly[i + 2]): + # LABEL x LABEL y + # replace all instances of PUSHLABEL x with PUSHLABEL y new_symbol = assembly[i + 2] if new_symbol != current_symbol: for j in range(len(assembly)): - if assembly[j] == current_symbol and i != j: - assembly[j] = new_symbol + if isinstance(assembly[j], PUSHLABEL) and assembly[j].label == current_symbol: + assembly[j].label = new_symbol changed = True - elif is_symbol(assembly[i + 2]) and assembly[i + 3] == "JUMP": - # _sym_x JUMPDEST _sym_y JUMP - # replace all instances of _sym_x with _sym_y - # (except for _sym_x JUMPDEST - don't want duplicate labels) + elif isinstance(assembly[i + 2], PUSHLABEL) and assembly[i + 3] == "JUMP": + # LABEL x PUSHLABEL y JUMP + # replace all instances of PUSHLABEL x with PUSHLABEL y new_symbol = assembly[i + 2] for j in range(len(assembly)): - if assembly[j] == current_symbol and i != j: - assembly[j] = new_symbol + if isinstance(assembly[j], PUSHLABEL) and assembly[j].label == current_symbol: + assembly[j].label = new_symbol changed = True i += 1 @@ -1002,7 +1002,7 @@ def _merge_iszero(assembly): # but it could also just be a no-op before JUMPI. if ( assembly[i : i + 2] == ["ISZERO", "ISZERO"] - and is_symbol(assembly[i + 2]) + and isinstance(assembly[i + 2], PUSHLABEL) and assembly[i + 3] == "JUMPI" ): changed = True @@ -1019,9 +1019,9 @@ def _prune_unused_jumpdests(assembly): used_jumpdests = OrderedSet() # find all used jumpdests - for i in range(len(assembly) - 1): - if is_symbol(assembly[i]) and assembly[i + 1] != "JUMPDEST": - used_jumpdests.add(assembly[i]) + for i in range(len(assembly)): + if isinstance(assembly[i], PUSHLABEL): + used_jumpdests.add(assembly[i].label) for item in assembly: if isinstance(item, list) and isinstance(item[0], DataHeader): @@ -1033,10 +1033,10 @@ def _prune_unused_jumpdests(assembly): # delete jumpdests that aren't used i = 0 - while i < len(assembly) - 2: + while i < len(assembly): if is_symbol(assembly[i]) and assembly[i] not in used_jumpdests: changed = True - del assembly[i : i + 2] + del assembly[i] else: i += 1 @@ -1274,7 +1274,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) # update pc_jump_map if item == "JUMP": last = assembly[i - 1] - if is_symbol(last) and last.label.startswith("internal"): + if isinstance(last, PUSHLABEL) and last.label.startswith("internal"): if last.label.endswith("cleanup"): # exit an internal function line_number_map["pc_jump_map"][pc] = "o" From 1f9531931cda4613f7fe30b864fa5f152dd66c19 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 5 May 2025 17:30:07 +0200 Subject: [PATCH 014/172] fix venom_to_assembly --- vyper/ir/compile_ir.py | 6 ++-- vyper/venom/venom_to_assembly.py | 54 ++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 08fc962346..5fe1e8c938 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -58,6 +58,7 @@ class Label: _next_symbol: int = 0 def __init__(self, label: str): + assert isinstance(label, str) self.label = label def __repr__(self): @@ -74,9 +75,10 @@ def __hash__(self): class PUSHLABEL: def __init__(self, label: Label): + assert isinstance(label, Label) self.label = label - def __str__(self): + def __repr__(self): return f"PUSHLABEL {self.label.label}" def __eq__(self, other): @@ -955,7 +957,7 @@ def _merge_jumpdests(assembly): elif isinstance(assembly[i + 2], PUSHLABEL) and assembly[i + 3] == "JUMP": # LABEL x PUSHLABEL y JUMP # replace all instances of PUSHLABEL x with PUSHLABEL y - new_symbol = assembly[i + 2] + new_symbol = assembly[i + 2].label for j in range(len(assembly)): if isinstance(assembly[j], PUSHLABEL) and assembly[j].label == current_symbol: assembly[j].label = new_symbol diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 47966c26a8..4635fe60f8 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -3,6 +3,8 @@ from vyper.exceptions import CompilerPanic, StackTooDeep from vyper.ir.compile_ir import ( PUSH, + is_mem_sym, + PUSHLABEL, DataHeader, Instruction, Label, @@ -108,7 +110,7 @@ ] ) -_REVERT_POSTAMBLE = ["_sym___revert", "JUMPDEST", *PUSH(0), "DUP1", "REVERT"] +_REVERT_POSTAMBLE = [Label("revert"), "JUMPDEST", *PUSH(0), "DUP1", "REVERT"] def apply_line_numbers(inst: IRInstruction, asm) -> list[str]: @@ -121,9 +123,16 @@ def apply_line_numbers(inst: IRInstruction, asm) -> list[str]: return ret # type: ignore -def _as_asm_symbol(label: IRLabel) -> str: +def _as_asm_symbol(label: IRLabel) -> Label: # Lower an IRLabel to an assembly symbol - return f"_sym_{label.value}" + return Label(label.value) + +def _ofst(label: str | Label, value: int) -> list[Any]: + if isinstance(label, str) and is_mem_sym(label): + pushlabel = label # _mem_foo is still magic + else: + pushlabel = PUSHLABEL(label) + return [*PUSH(value), label, "ADD"] # TODO: "assembly" gets into the recursion due to how the original @@ -168,17 +177,17 @@ def generate_evm(self, no_optimize: bool = False) -> list[str]: self._generate_evm_for_basicblock_r(asm, fn.entry, StackModel()) # TODO make this property on IRFunction - asm.extend([Label("_sym__ctor_exit"), "JUMPDEST"]) + asm.extend([Label("ctor_exit")]) if ctx.immutables_len is not None and ctx.ctor_mem_size is not None: asm.extend( - ["_sym_subcode_size", "_sym_runtime_begin", "_mem_deploy_start", "CODECOPY"] + [PUSHLABEL(Label("subcode_size")), PUSHLABEL(Label("runtime_begin")), "_mem_deploy_start", "CODECOPY"] ) - asm.extend(["_OFST", "_sym_subcode_size", ctx.immutables_len]) # stack: len + asm.extend(_ofst(Label("subcode_size"), ctx.immutables_len)) # stack: len asm.extend(["_mem_deploy_start"]) # stack: len mem_ofst asm.extend(["RETURN"]) asm.extend(_REVERT_POSTAMBLE) runtime_asm = [ - RuntimeHeader("_sym_runtime_begin", ctx.ctor_mem_size, ctx.immutables_len) + RuntimeHeader(Label("runtime_begin"), ctx.ctor_mem_size, ctx.immutables_len) ] asm.append(runtime_asm) asm = runtime_asm @@ -193,7 +202,7 @@ def generate_evm(self, no_optimize: bool = False) -> list[str]: for item in data_section.data_items: data = item.data if isinstance(data, IRLabel): - asm_data_section.append(Label(_as_asm_symbol(data))) + asm_data_section.append(_as_asm_symbol(data)) else: assert isinstance(data, bytes) asm_data_section.append(data) @@ -263,7 +272,7 @@ def _emit_input_operands( # invoke emits the actual instruction itself so we don't need # to emit it here but we need to add it to the stack map if inst.opcode != "invoke": - assembly.append(_as_asm_symbol(op)) + assembly.append(PUSHLABEL(_as_asm_symbol(op))) stack.push(op) continue @@ -338,8 +347,7 @@ def _generate_evm_for_basicblock_r( asm = [] # assembly entry point into the block - asm.append(Label(_as_asm_symbol(basicblock.label))) - asm.append("JUMPDEST") + asm.append(_as_asm_symbol(basicblock.label)) fn = basicblock.parent if basicblock == fn.entry: @@ -451,7 +459,7 @@ def _generate_evm_for_instruction( if opcode == "offset": ofst, label = inst.operands assert isinstance(label, IRLabel) # help mypy - assembly.extend(["_OFST", _as_asm_symbol(label), ofst.value]) + assembly.extend(_ofst(_as_asm_symbol(label), ofst.value)) assert isinstance(inst.output, IROperand), "Offset must have output" stack.push(inst.output) return apply_line_numbers(inst, assembly) @@ -517,19 +525,19 @@ def _generate_evm_for_instruction( elif opcode == "jnz": # jump if not zero if_nonzero_label, if_zero_label = inst.get_label_operands() - assembly.append(_as_asm_symbol(if_nonzero_label)) + assembly.append(PUSHLABEL(_as_asm_symbol(if_nonzero_label))) assembly.append("JUMPI") # make sure the if_zero_label will be optimized out # assert if_zero_label == next(iter(inst.parent.cfg_out)).label - assembly.append(_as_asm_symbol(if_zero_label)) + assembly.append(PUSHLABEL(_as_asm_symbol(if_zero_label))) assembly.append("JUMP") elif opcode == "jmp": (target,) = inst.operands assert isinstance(target, IRLabel) - assembly.append(_as_asm_symbol(target)) + assembly.append(PUSHLABEL(_as_asm_symbol(target))) assembly.append("JUMP") elif opcode == "djmp": assert isinstance( @@ -541,13 +549,13 @@ def _generate_evm_for_instruction( assert isinstance( target, IRLabel ), f"invoke target must be a label (is ${type(target)} ${target})" + return_label = Label(f"label_ret_{self.label_counter}") assembly.extend( [ - f"_sym_label_ret_{self.label_counter}", - _as_asm_symbol(target), + PUSHLABEL(return_label), + PUSHLABEL(_as_asm_symbol(target)), "JUMP", - f"_sym_label_ret_{self.label_counter}", - "JUMPDEST", + return_label, ] ) self.label_counter += 1 @@ -556,7 +564,7 @@ def _generate_evm_for_instruction( elif opcode == "return": assembly.append("RETURN") elif opcode == "exit": - assembly.extend(["_sym__ctor_exit", "JUMP"]) + assembly.extend([PUSHLABEL(Label("ctor_exit")), "JUMP"]) elif opcode == "phi": pass elif opcode == "sha3": @@ -574,21 +582,21 @@ def _generate_evm_for_instruction( ] ) elif opcode == "assert": - assembly.extend(["ISZERO", "_sym___revert", "JUMPI"]) + assembly.extend(["ISZERO", PUSHLABEL(Label("revert")), "JUMPI"]) elif opcode == "assert_unreachable": end_symbol = mksymbol("reachable") assembly.extend([end_symbol, "JUMPI", "INVALID", end_symbol, "JUMPDEST"]) elif opcode == "iload": addr = inst.operands[0] if isinstance(addr, IRLiteral): - assembly.extend(["_OFST", "_mem_deploy_end", addr.value]) + assembly.extend(_ofst("_mem_deploy_end", addr.value)) else: assembly.extend(["_mem_deploy_end", "ADD"]) assembly.append("MLOAD") elif opcode == "istore": addr = inst.operands[1] if isinstance(addr, IRLiteral): - assembly.extend(["_OFST", "_mem_deploy_end", addr.value]) + assembly.extend(_ofst("_mem_deploy_end", addr.value)) else: assembly.extend(["_mem_deploy_end", "ADD"]) assembly.append("MSTORE") From e50003dedf8783a73fdaaa4e2736cef9bd52b669 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 5 May 2025 17:42:23 +0200 Subject: [PATCH 015/172] fix off-by-ones --- vyper/ir/compile_ir.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 5fe1e8c938..b27df7f3f0 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -941,23 +941,23 @@ def _merge_jumpdests(assembly): # or some nested if statements.) changed = False i = 0 - while i < len(assembly) - 3: + while i < len(assembly) - 2: #if is_symbol(assembly[i]) and assembly[i + 1] == "JUMPDEST": if is_symbol(assembly[i]): current_symbol = assembly[i] - if is_symbol(assembly[i + 2]): + if is_symbol(assembly[i + 1]): # LABEL x LABEL y # replace all instances of PUSHLABEL x with PUSHLABEL y - new_symbol = assembly[i + 2] + new_symbol = assembly[i + 1] if new_symbol != current_symbol: for j in range(len(assembly)): if isinstance(assembly[j], PUSHLABEL) and assembly[j].label == current_symbol: assembly[j].label = new_symbol changed = True - elif isinstance(assembly[i + 2], PUSHLABEL) and assembly[i + 3] == "JUMP": + elif isinstance(assembly[i + 1], PUSHLABEL) and assembly[i + 2] == "JUMP": # LABEL x PUSHLABEL y JUMP # replace all instances of PUSHLABEL x with PUSHLABEL y - new_symbol = assembly[i + 2].label + new_symbol = assembly[i + 1].label for j in range(len(assembly)): if isinstance(assembly[j], PUSHLABEL) and assembly[j].label == current_symbol: assembly[j].label = new_symbol From 136362e1326f2844de47e02c6652c7a93625c355 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 5 May 2025 17:44:58 +0200 Subject: [PATCH 016/172] fix type error --- vyper/ir/compile_ir.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index b27df7f3f0..942eda8308 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1276,8 +1276,8 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) # update pc_jump_map if item == "JUMP": last = assembly[i - 1] - if isinstance(last, PUSHLABEL) and last.label.startswith("internal"): - if last.label.endswith("cleanup"): + if isinstance(last, PUSHLABEL) and last.label.label.startswith("internal"): + if last.label.label.endswith("cleanup"): # exit an internal function line_number_map["pc_jump_map"][pc] = "o" else: From 338ca395cc3bde243f5683860f253ddfa1a3bc5d Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 5 May 2025 18:23:26 +0200 Subject: [PATCH 017/172] update test --- tests/functional/codegen/test_selector_table_stability.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/codegen/test_selector_table_stability.py b/tests/functional/codegen/test_selector_table_stability.py index 4416b5f5ea..82955ab5e1 100644 --- a/tests/functional/codegen/test_selector_table_stability.py +++ b/tests/functional/codegen/test_selector_table_stability.py @@ -14,7 +14,7 @@ def test_dense_jumptable_stability(): # test that the selector table data is stable across different runs # (xdist should provide different PYTHONHASHSEEDs). - expected_asm = """{ DATA _sym_BUCKET_HEADERS b\'\\x0bB\' _sym_bucket_0 b\'\\n\' b\'+\\x8d\' _sym_bucket_1 b\'\\x0c\' b\'\\x00\\x85\' _sym_bucket_2 b\'\\x08\' } { DATA _sym_bucket_1 b\'\\xd8\\xee\\xa1\\xe8\' _sym_external 6 foo6()3639517672 b\'\\x05\' b\'\\xd2\\x9e\\xe0\\xf9\' _sym_external 0 foo0()3533627641 b\'\\x05\' b\'\\x05\\xf1\\xe0_\' _sym_external 2 foo2()99737695 b\'\\x05\' b\'\\x91\\t\\xb4{\' _sym_external 23 foo23()2433332347 b\'\\x05\' b\'np3\\x7f\' _sym_external 11 foo11()1852846975 b\'\\x05\' b\'&\\xf5\\x96\\xf9\' _sym_external 13 foo13()653629177 b\'\\x05\' b\'\\x04ga\\xeb\' _sym_external 14 foo14()73884139 b\'\\x05\' b\'\\x89\\x06\\xad\\xc6\' _sym_external 17 foo17()2298916294 b\'\\x05\' b\'\\xe4%\\xac\\xd1\' _sym_external 4 foo4()3827674321 b\'\\x05\' b\'yj\\x01\\xac\' _sym_external 7 foo7()2036990380 b\'\\x05\' b\'\\xf1\\xe6K\\xe5\' _sym_external 29 foo29()4058401765 b\'\\x05\' b\'\\xd2\\x89X\\xb8\' _sym_external 3 foo3()3532216504 b\'\\x05\' } { DATA _sym_bucket_2 b\'\\x06p\\xffj\' _sym_external 25 foo25()108068714 b\'\\x05\' b\'\\x964\\x99I\' _sym_external 24 foo24()2520029513 b\'\\x05\' b\'s\\x81\\xe7\\xc1\' _sym_external 10 foo10()1937893313 b\'\\x05\' b\'\\x85\\xad\\xc11\' _sym_external 28 foo28()2242756913 b\'\\x05\' b\'\\xfa"\\xb1\\xed\' _sym_external 5 foo5()4196577773 b\'\\x05\' b\'A\\xe7[\\x05\' _sym_external 22 foo22()1105681157 b\'\\x05\' b\'\\xd3\\x89U\\xe8\' _sym_external 1 foo1()3548993000 b\'\\x05\' b\'hL\\xf8\\xf3\' _sym_external 20 foo20()1749874931 b\'\\x05\' } { DATA _sym_bucket_0 b\'\\xee\\xd9\\x1d\\xe3\' _sym_external 9 foo9()4007206371 b\'\\x05\' b\'a\\xbc\\x1ch\' _sym_external 16 foo16()1639717992 b\'\\x05\' b\'\\xd3*\\xa7\\x0c\' _sym_external 21 foo21()3542787852 b\'\\x05\' b\'\\x18iG\\xd9\' _sym_external 19 foo19()409552857 b\'\\x05\' b\'\\n\\xf1\\xf9\\x7f\' _sym_external 18 foo18()183630207 b\'\\x05\' b\')\\xda\\xd7`\' _sym_external 27 foo27()702207840 b\'\\x05\' b\'2\\xf6\\xaa\\xda\' _sym_external 12 foo12()855026394 b\'\\x05\' b\'\\xbe\\xb5\\x05\\xf5\' _sym_external 15 foo15()3199534581 b\'\\x05\' b\'\\xfc\\xa7_\\xe6\' _sym_external 8 foo8()4238827494 b\'\\x05\' b\'\\x1b\\x12C8\' _sym_external 26 foo26()454181688 b\'\\x05\' } }""" # noqa: E501, FS003 + expected_asm = """{ DATA BUCKET_HEADERS b\'\\x0bB\' LABEL bucket_0 b\'\\n\' b\'+\\x8d\' LABEL bucket_1 b\'\\x0c\' b\'\\x00\\x85\' LABEL bucket_2 b\'\\x08\' } { DATA bucket_1 b\'\\xd8\\xee\\xa1\\xe8\' LABEL external 6 foo6()3639517672 b\'\\x05\' b\'\\xd2\\x9e\\xe0\\xf9\' LABEL external 0 foo0()3533627641 b\'\\x05\' b\'\\x05\\xf1\\xe0_\' LABEL external 2 foo2()99737695 b\'\\x05\' b\'\\x91\\t\\xb4{\' LABEL external 23 foo23()2433332347 b\'\\x05\' b\'np3\\x7f\' LABEL external 11 foo11()1852846975 b\'\\x05\' b\'&\\xf5\\x96\\xf9\' LABEL external 13 foo13()653629177 b\'\\x05\' b\'\\x04ga\\xeb\' LABEL external 14 foo14()73884139 b\'\\x05\' b\'\\x89\\x06\\xad\\xc6\' LABEL external 17 foo17()2298916294 b\'\\x05\' b\'\\xe4%\\xac\\xd1\' LABEL external 4 foo4()3827674321 b\'\\x05\' b\'yj\\x01\\xac\' LABEL external 7 foo7()2036990380 b\'\\x05\' b\'\\xf1\\xe6K\\xe5\' LABEL external 29 foo29()4058401765 b\'\\x05\' b\'\\xd2\\x89X\\xb8\' LABEL external 3 foo3()3532216504 b\'\\x05\' } { DATA bucket_2 b\'\\x06p\\xffj\' LABEL external 25 foo25()108068714 b\'\\x05\' b\'\\x964\\x99I\' LABEL external 24 foo24()2520029513 b\'\\x05\' b\'s\\x81\\xe7\\xc1\' LABEL external 10 foo10()1937893313 b\'\\x05\' b\'\\x85\\xad\\xc11\' LABEL external 28 foo28()2242756913 b\'\\x05\' b\'\\xfa"\\xb1\\xed\' LABEL external 5 foo5()4196577773 b\'\\x05\' b\'A\\xe7[\\x05\' LABEL external 22 foo22()1105681157 b\'\\x05\' b\'\\xd3\\x89U\\xe8\' LABEL external 1 foo1()3548993000 b\'\\x05\' b\'hL\\xf8\\xf3\' LABEL external 20 foo20()1749874931 b\'\\x05\' } { DATA bucket_0 b\'\\xee\\xd9\\x1d\\xe3\' LABEL external 9 foo9()4007206371 b\'\\x05\' b\'a\\xbc\\x1ch\' LABEL external 16 foo16()1639717992 b\'\\x05\' b\'\\xd3*\\xa7\\x0c\' LABEL external 21 foo21()3542787852 b\'\\x05\' b\'\\x18iG\\xd9\' LABEL external 19 foo19()409552857 b\'\\x05\' b\'\\n\\xf1\\xf9\\x7f\' LABEL external 18 foo18()183630207 b\'\\x05\' b\')\\xda\\xd7`\' LABEL external 27 foo27()702207840 b\'\\x05\' b\'2\\xf6\\xaa\\xda\' LABEL external 12 foo12()855026394 b\'\\x05\' b\'\\xbe\\xb5\\x05\\xf5\' LABEL external 15 foo15()3199534581 b\'\\x05\' b\'\\xfc\\xa7_\\xe6\' LABEL external 8 foo8()4238827494 b\'\\x05\' b\'\\x1b\\x12C8\' LABEL external 26 foo26()454181688 b\'\\x05\' } }""" # noqa: E501, FS003 assert expected_asm in output["asm"] From 6a661abfff756f35206a38863b7e697f49e45e60 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 5 May 2025 18:32:50 +0200 Subject: [PATCH 018/172] venom_to_assembly fixes --- vyper/venom/venom_to_assembly.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 4635fe60f8..f1223cfdd8 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -132,7 +132,7 @@ def _ofst(label: str | Label, value: int) -> list[Any]: pushlabel = label # _mem_foo is still magic else: pushlabel = PUSHLABEL(label) - return [*PUSH(value), label, "ADD"] + return [pushlabel, *PUSH(value), "ADD"] # TODO: "assembly" gets into the recursion due to how the original @@ -585,7 +585,7 @@ def _generate_evm_for_instruction( assembly.extend(["ISZERO", PUSHLABEL(Label("revert")), "JUMPI"]) elif opcode == "assert_unreachable": end_symbol = mksymbol("reachable") - assembly.extend([end_symbol, "JUMPI", "INVALID", end_symbol, "JUMPDEST"]) + assembly.extend([PUSHLABEL(end_symbol), "JUMPI", "INVALID", end_symbol]) elif opcode == "iload": addr = inst.operands[0] if isinstance(addr, IRLiteral): From 69f5be1983d25eedbe6dc75bb7151a0e63720f61 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 6 May 2025 09:40:59 +0200 Subject: [PATCH 019/172] comment --- vyper/ir/compile_ir.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 942eda8308..b1cba85da2 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1381,7 +1381,8 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) ret.extend(bytecode) elif is_ofst(item): - # _OFST _sym_foo 32 + # _OFST (LABEL foo) 32 + # _OFST _mem_foo 32 ofst = symbol_map[assembly[i + 1]] + assembly[i + 2] n = mem_ofst_size if is_mem_sym(assembly[i + 1]) else SYMBOL_SIZE bytecode, _ = assembly_to_evm(PUSH_N(ofst, n)) From c152600357d1d65ed2e9f7adb68657825a486a77 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 6 May 2025 10:39:31 +0200 Subject: [PATCH 020/172] comment update --- vyper/ir/compile_ir.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index b1cba85da2..787bef5341 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -129,10 +129,10 @@ def _runtime_code_offsets(ctor_mem_size, runtime_codelen): # of the runtime code. # after the ctor has run but before copying runtime code to # memory, the layout is - # ... | data section + # | | # and after copying runtime code to memory (immediately before # returning the runtime code): - # ... | data section + # | |<- | # since the ctor memory variables and runtime code overlap, # we start allocating the data section from # `max(ctor_mem_size, runtime_code_size)` From 4dea535df57159b9627eed9ba5643bc843235977 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 7 May 2025 10:55:47 +0200 Subject: [PATCH 021/172] add PUSH_OFST --- vyper/ir/compile_ir.py | 72 +++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 787bef5341..89f5093a13 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -89,6 +89,29 @@ def __eq__(self, other): def __hash__(self): return hash(self.label) +class PUSH_OFST: + def __init__(self, label: Label | str, ofst: int): + # label can be Label or (temporarily) str, until + # we clean up mem_syms. + assert isinstance(label, (Label, str)) + self.label = label + self.ofst = ofst + + def __repr__(self): + label = self.label + if isinstance(label, Label): + label = label.label # str + return f"PUSH_OFST({label}, {self.ofst})" + + def __eq__(self, other): + if not isinstance(other, PUSH_OFST): + return False + return self.label == other.label and self.ofst == other.ofst + + def __hash__(self): + return hash((self.label, self.ofst)) + + def mksymbol(name=""): Label._next_symbol += 1 @@ -117,8 +140,8 @@ def is_mem_sym(i): return isinstance(i, str) and i.startswith("_mem_") -def is_ofst(sym): - return isinstance(sym, str) and sym == "_OFST" +def is_ofst(assembly_item): + return isinstance(assembly_item, PUSH_OFST) def _runtime_code_offsets(ctor_mem_size, runtime_codelen): @@ -286,24 +309,23 @@ def _compile_to_assembly(code, withargs=None, existing_labels=None, break_dest=N raise CompilerPanic(f"Incorrect type for withargs: {type(withargs)}") def _data_ofst_of(sym, ofst, height_): - # e.g. _OFST Label foo 32 + # e.g. PUSHOFST foo 32 assert is_symbol(sym) or is_mem_sym(sym), sym - # simple way -- reintroduce compile-time resolution later - ofst = _compile_to_assembly(ofst, withargs, existing_labels, break_dest, height_) + if isinstance(ofst.value, int): + # resolve at compile time using magic PUSH_OFST op + return [PUSH_OFST(sym, ofst.value)] + if is_symbol(sym): - return ofst + [PUSHLABEL(sym), "ADD"] + pushsym = PUSHLABEL(sym) else: # magic for mem syms - return ofst + [sym, "ADD"] + assert is_mem_sym(sym) # clarity + pushsym = sym - if isinstance(ofst.value, int): - # resolve at compile time using magic _OFST op - return ["_OFST", sym, ofst.value] - else: - # if we can't resolve at compile time, resolve at runtime - ofst = _compile_to_assembly(ofst, withargs, existing_labels, break_dest, height_) - return ofst + [sym, "ADD"] + # if we can't resolve at compile time, resolve at runtime + ofst = _compile_to_assembly(ofst, withargs, existing_labels, break_dest, height_) + return ofst + [pushsym, "ADD"] def _height_of(witharg): ret = height - withargs[witharg] @@ -578,7 +600,7 @@ def _height_of(witharg): o.extend([PUSHLABEL(Label("subcode_size")), PUSHLABEL(runtime_begin), "_mem_deploy_start", "CODECOPY"]) # calculate the len of runtime code - o.extend(_data_ofst_of(Label("subcode_size"), IRnode(immutables_len), height)) + o.extend(_data_ofst_of(Label("subcode_size"), IRnode(immutables_len), height)) # stack: len o.extend(["_mem_deploy_start"]) # stack: len mem_ofst o.extend(["RETURN"]) @@ -1257,8 +1279,8 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) ) assert runtime_code_end - runtime_code_start == len(runtime_code) - if is_ofst(item) and is_mem_sym(assembly[i + 1]): - max_mem_ofst = max(assembly[i + 2], max_mem_ofst) + if is_ofst(item) and is_mem_sym(item.label): + max_mem_ofst = max(item.ofst, max_mem_ofst) if runtime_code_end is not None: mem_ofst_size = calc_mem_ofst_size(runtime_code_end + max_mem_ofst) @@ -1302,10 +1324,10 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) # PUSH item pc += mem_ofst_size + 1 elif is_ofst(item): - assert is_symbol(assembly[i + 1]) or is_mem_sym(assembly[i + 1]), assembly[i + 1] - assert isinstance(assembly[i + 2], int) - # [_OFST, _sym_foo, bar] -> PUSH2 (foo+bar) - # [_OFST, _mem_foo, bar] -> PUSHN (foo+bar) + assert is_symbol(item.label) or is_mem_sym(item.label), item.label + assert isinstance(item.ofst, int), item + # [PUSH_OFST, (Label foo), bar] -> PUSH2 (foo+bar) + # [PUSH_OFST, _mem_foo, bar] -> PUSHN (foo+bar) pc -= 1 elif isinstance(item, list) and isinstance(item[0], RuntimeHeader): # we are in initcode @@ -1381,10 +1403,10 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) ret.extend(bytecode) elif is_ofst(item): - # _OFST (LABEL foo) 32 - # _OFST _mem_foo 32 - ofst = symbol_map[assembly[i + 1]] + assembly[i + 2] - n = mem_ofst_size if is_mem_sym(assembly[i + 1]) else SYMBOL_SIZE + # PUSH_OFST (LABEL foo) 32 + # PUSH_OFST _mem_foo 32 + ofst = symbol_map[item.label] + item.ofst + n = mem_ofst_size if is_mem_sym(item.label) else SYMBOL_SIZE bytecode, _ = assembly_to_evm(PUSH_N(ofst, n)) ret.extend(bytecode) to_skip = 2 From 2624aa2c2c93a2006c0b628137a527b71bc72a41 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 7 May 2025 11:04:23 +0200 Subject: [PATCH 022/172] comment --- vyper/ir/compile_ir.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 89f5093a13..0069799008 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -133,9 +133,9 @@ def is_symbol(i): return isinstance(i, Label) -# basically something like a symbol which gets resolved -# during assembly, but requires 4 bytes of space. -# (should only happen in deploy code) +# basically a pointer but like a symbol in that it gets resolved +# during assembly, but requires up to 4 bytes of space. +# (should only happen in initcode) def is_mem_sym(i): return isinstance(i, str) and i.startswith("_mem_") From 952ef29509bb959feab7361b39fcb50704e4ea41 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 7 May 2025 11:08:51 +0200 Subject: [PATCH 023/172] fix push_ofst --- vyper/ir/compile_ir.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 0069799008..056be86cd9 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1328,7 +1328,10 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) assert isinstance(item.ofst, int), item # [PUSH_OFST, (Label foo), bar] -> PUSH2 (foo+bar) # [PUSH_OFST, _mem_foo, bar] -> PUSHN (foo+bar) - pc -= 1 + if is_symbol(item.label): + pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits + else: + pc += mem_ofst_size + 1 elif isinstance(item, list) and isinstance(item[0], RuntimeHeader): # we are in initcode symbol_map[item[0].label] = pc @@ -1380,11 +1383,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) # now that all symbols have been resolved, generate bytecode # using the symbol map - to_skip = 0 for i, item in enumerate(assembly): - if to_skip > 0: - to_skip -= 1 - continue if item in ("DEBUG",): continue # skippable opcodes @@ -1399,6 +1398,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) ret.append(get_opcodes()["JUMPDEST"][0]) elif is_mem_sym(item): + # TODO: use something like PUSH_MEM_SYM(?) for these. bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=mem_ofst_size)) ret.extend(bytecode) @@ -1409,7 +1409,6 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) n = mem_ofst_size if is_mem_sym(item.label) else SYMBOL_SIZE bytecode, _ = assembly_to_evm(PUSH_N(ofst, n)) ret.extend(bytecode) - to_skip = 2 elif isinstance(item, int): ret.append(item) From 52e65c42c525b3193117cbde6eaca74182ecadce Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 7 May 2025 11:16:49 +0200 Subject: [PATCH 024/172] push_ofst in venom_to_assembly --- vyper/venom/venom_to_assembly.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index f1223cfdd8..e16264fc71 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -3,6 +3,7 @@ from vyper.exceptions import CompilerPanic, StackTooDeep from vyper.ir.compile_ir import ( PUSH, + PUSH_OFST, is_mem_sym, PUSHLABEL, DataHeader, @@ -128,12 +129,8 @@ def _as_asm_symbol(label: IRLabel) -> Label: return Label(label.value) def _ofst(label: str | Label, value: int) -> list[Any]: - if isinstance(label, str) and is_mem_sym(label): - pushlabel = label # _mem_foo is still magic - else: - pushlabel = PUSHLABEL(label) - return [pushlabel, *PUSH(value), "ADD"] - + # resolve at compile time using magic PUSH_OFST op + return [PUSH_OFST(label, value)] # TODO: "assembly" gets into the recursion due to how the original # IR was structured recursively in regards with the deploy instruction. From 6a8183eec7e1fc7d604c56f573e946bec133c0c7 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 7 May 2025 11:18:05 +0200 Subject: [PATCH 025/172] remove jumpdest instruction --- vyper/venom/venom_to_assembly.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index e16264fc71..a644b0de15 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -111,7 +111,7 @@ ] ) -_REVERT_POSTAMBLE = [Label("revert"), "JUMPDEST", *PUSH(0), "DUP1", "REVERT"] +_REVERT_POSTAMBLE = [Label("revert"), *PUSH(0), "DUP1", "REVERT"] def apply_line_numbers(inst: IRInstruction, asm) -> list[str]: From d6864feb3b5e5b2aff4773c8b7c0b6bb13a1d31c Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 7 May 2025 11:26:28 +0200 Subject: [PATCH 026/172] update comment --- vyper/ir/compile_ir.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 056be86cd9..cac5a1cb28 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -152,10 +152,10 @@ def _runtime_code_offsets(ctor_mem_size, runtime_codelen): # of the runtime code. # after the ctor has run but before copying runtime code to # memory, the layout is - # | | + # | | # and after copying runtime code to memory (immediately before # returning the runtime code): - # | |<- | + # | | # since the ctor memory variables and runtime code overlap, # we start allocating the data section from # `max(ctor_mem_size, runtime_code_size)` From a0c48c0d38f69081abe3873b3f3b0e1957628c36 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 8 May 2025 18:04:18 +0200 Subject: [PATCH 027/172] wip - refactor IRnode to assembly into a class --- vyper/codegen/core.py | 2 +- vyper/codegen/module.py | 1 - vyper/compiler/phases.py | 2 - vyper/ir/compile_ir.py | 1308 +++++++++++++++--------------- vyper/venom/venom_to_assembly.py | 34 +- 5 files changed, 662 insertions(+), 685 deletions(-) diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index ea51eda832..9bfae529a3 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -910,7 +910,7 @@ def FAIL(): # pragma: no cover _label = 0 -# TODO might want to coalesce with Context.fresh_varname and compile_ir.mksymbol +# TODO might want to coalesce with Context.fresh_varname def _freshname(name): global _label _label += 1 diff --git a/vyper/codegen/module.py b/vyper/codegen/module.py index 63c6a8f488..56a8da0f79 100644 --- a/vyper/codegen/module.py +++ b/vyper/codegen/module.py @@ -12,7 +12,6 @@ from vyper.codegen.ir_node import IRnode from vyper.compiler.settings import _is_debug_mode from vyper.exceptions import CompilerPanic -from vyper.ir.compile_ir import Label from vyper.semantics.types.module import ModuleT from vyper.utils import OrderedSet, method_id_int diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index d79ab319b3..2daa0ec966 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -12,7 +12,6 @@ from vyper.compiler.input_bundle import FileInput, FilesystemInputBundle, InputBundle from vyper.compiler.settings import OptimizationLevel, Settings, anchor_settings, merge_settings from vyper.ir import compile_ir, optimizer -from vyper.ir.compile_ir import reset_symbols from vyper.semantics import analyze_module, set_data_positions, validate_compilation_target from vyper.semantics.analysis.data_positions import generate_layout_export from vyper.semantics.analysis.imports import resolve_imports @@ -320,7 +319,6 @@ def generate_ir_nodes(global_ctx: ModuleT, settings: Settings) -> tuple[IRnode, """ # make IR output the same between runs codegen.reset_names() - reset_symbols() with anchor_settings(settings): ir_nodes, ir_runtime = module.generate_ir_for_module(global_ctx) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index cac5a1cb28..1c6826eca0 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1,7 +1,7 @@ from __future__ import annotations +import contextlib import copy -import functools import math from dataclasses import dataclass @@ -46,17 +46,12 @@ def PUSH_N(x, n): return [f"PUSH{len(o)}"] + o -def JUMP(label: Label): - return [PUSHLABEL(label), "JUMP"] - - -def JUMPI(label: Label): - return [PUSHLABEL(label), "JUMPI"] +##################################### +# assembly data structures and utils +##################################### class Label: - _next_symbol: int = 0 - def __init__(self, label: str): assert isinstance(label, str) self.label = label @@ -73,6 +68,22 @@ def __hash__(self): return hash(self.label) +class CONST: + def __init__(self, name: str, value: int): + assert isinstance(name, str) + assert isinstance(value, int) + self.name = name + self.value = value + + def __repr__(self): + return f"CONST {self.name} {self.value}" + + def __eq__(self, other): + if not isinstance(other, CONST): + return False + return self.name == other.name and self.value == other.value + + class PUSHLABEL: def __init__(self, label: Label): assert isinstance(label, Label) @@ -89,6 +100,8 @@ def __eq__(self, other): def __hash__(self): return hash(self.label) + +# push the result of an addition (which might be resolvable at compile-time) class PUSH_OFST: def __init__(self, label: Label | str, ofst: int): # label can be Label or (temporarily) str, until @@ -111,20 +124,28 @@ def __eq__(self, other): def __hash__(self): return hash((self.label, self.ofst)) +class DATA_ITEM: + def __init__(self, item: bytes | Label): + self.data = item + def __repr__(self): + if isinstance(self.item, bytes): + return "DATABYTES {self.item}" + elif isinstance(self.item, Label): + return "DATALABEL {self.item.label}" -def mksymbol(name=""): - Label._next_symbol += 1 - - return Label(f"{name}{Label._next_symbol}") +def JUMP(label: Label): + return [PUSHLABEL(label), "JUMP"] -def reset_symbols(): - Label._next_symbol = 0 +def JUMPI(label: Label): + return [PUSHLABEL(label), "JUMPI"] def mkdebug(pc_debugger, ast_source): - i = Instruction("DEBUG", ast_source) + # compile debug instructions + # (this is dead code -- CMC 2025-05-08) + i = TaggedInstruction("DEBUG", ast_source) i.pc_debugger = pc_debugger return [i] @@ -211,645 +232,634 @@ def _rewrite_return_sequences(ir_node, label_params=None): _rewrite_return_sequences(t, label_params) -def _assert_false(): - global _revert_label - # use a shared failure block for common case of assert(x). - # in the future we might want to change the code - # at _sym_revert0 to: INVALID - return JUMPI(_revert_label) - - -def _add_postambles(asm_ops): - to_append = [] +# a string (assembly instruction) but with additional metadata from the source code +class TaggedInstruction(str): + def __new__(cls, sstr, *args, **kwargs): + return super().__new__(cls, sstr) - global _revert_label + def __init__(self, sstr, ast_source=None, error_msg=None): + self.error_msg = error_msg + self.pc_debugger = False - _revert_string = [_revert_label, *PUSH(0), "DUP1", "REVERT"] + self.ast_source = ast_source - if PUSHLABEL(_revert_label) in asm_ops: - # shared failure block - to_append.extend(_revert_string) - if len(to_append) > 0: - # insert the postambles *before* runtime code - # so the data section of the runtime code can't bork the postambles. - runtime = None - if isinstance(asm_ops[-1], list) and isinstance(asm_ops[-1][0], RuntimeHeader): - runtime = asm_ops.pop() +############################## +# IRnode to assembly +############################## - # for some reason there might not be a STOP at the end of asm_ops. - # (generally vyper programs will have it but raw IR might not). - asm_ops.append("STOP") - asm_ops.extend(to_append) - if runtime: - asm_ops.append(runtime) +def compile_to_assembly(code, optimize=OptimizationLevel.GAS): + # don't overwrite ir since the original might need to be output, e.g. `-f ir,asm` + code = copy.deepcopy(code) + _rewrite_return_sequences(code) - # need to do this recursively since every sublist is basically - # treated as its own program (there are no global labels.) - for t in asm_ops: - if isinstance(t, list): - _add_postambles(t) + res = _IRnodeLowerer().compile_to_assembly(code) + if optimize != OptimizationLevel.NONE: + optimize_assembly(res) + return res -class Instruction(str): - def __new__(cls, sstr, *args, **kwargs): - return super().__new__(cls, sstr) - def __init__(self, sstr, ast_source=None, error_msg=None): - self.error_msg = error_msg - self.pc_debugger = False +AssemblyInstruction = str | TaggedInstruction | int | PUSHLABEL | Label | PUSH_OFST - self.ast_source = ast_source +class _IRnodeLowerer: + # map from variable names to height in stack + withargs: dict[str, int] -def apply_line_numbers(func): - @functools.wraps(func) - def apply_line_no_wrapper(*args, **kwargs): - code = args[0] - ret = func(*args, **kwargs) + # set of all existing labels + existing_labels: set[Label] - new_ret = [ - Instruction(i, code.ast_source, code.error_msg) - if isinstance(i, str) and not isinstance(i, Instruction) - else i - for i in ret - ] - return new_ret + # break destination when inside loops + # continue_dest, break_dest, height + break_dest: tuple[Label, Label, int] - return apply_line_no_wrapper + # current height in stack + height: int + code_instructions: list[AssemblyInstruction] + data_segments: list[DataSegment] -@apply_line_numbers -def compile_to_assembly(code, optimize=OptimizationLevel.GAS): - global _revert_label - _revert_label = mksymbol("revert") + def __init__(self, symbol_counter=0): + self.symbol_counter = symbol_counter - # don't overwrite ir since the original might need to be output, e.g. `-f ir,asm` - code = copy.deepcopy(code) - _rewrite_return_sequences(code) + def compile_to_assembly(self, code): + self.withargs = {} + self.existing_labels = set() + self.break_dest = None + self.height = 0 - res = _compile_to_assembly(code) + self.global_revert_label = None - _add_postambles(res) + self.data_segments = [] + self.freeze_data_segments = False - _relocate_segments(res) + return self._compile_r(code, height=0) - if optimize != OptimizationLevel.NONE: - optimize_assembly(res) - return res + @contextlib.contextmanager + def modify_breakdest(self, continue_dest: Label, exit_dest: Label, height: int): + tmp = self.break_dest + try: + self.break_dest = continue_dest, exit_dest, height + yield + finally: + self.break_dest = tmp + def mksymbol(self, name: str) -> Label: + self.symbol_counter += 1 -# Compiles IR to assembly -@apply_line_numbers -def _compile_to_assembly(code, withargs=None, existing_labels=None, break_dest=None, height=0): - if withargs is None: - withargs = {} - if not isinstance(withargs, dict): - raise CompilerPanic(f"Incorrect type for withargs: {type(withargs)}") + return Label(f"{name}_{self.symbol_counter}") - def _data_ofst_of(sym, ofst, height_): + def _data_ofst_of(self, symbol: str | Label, ofst: IRnode, height) -> list[AssemblyInstruction]: # e.g. PUSHOFST foo 32 - assert is_symbol(sym) or is_mem_sym(sym), sym + assert is_symbol(symbol) or is_mem_sym(symbol), symbol if isinstance(ofst.value, int): # resolve at compile time using magic PUSH_OFST op - return [PUSH_OFST(sym, ofst.value)] + return [PUSH_OFST(symbol, ofst.value)] - if is_symbol(sym): - pushsym = PUSHLABEL(sym) + if isinstance(symbol, Label): + pushsym = PUSHLABEL(symbol) else: # magic for mem syms - assert is_mem_sym(sym) # clarity - pushsym = sym + assert is_mem_sym(symbol) # clarity + pushsym = symbol # if we can't resolve at compile time, resolve at runtime - ofst = _compile_to_assembly(ofst, withargs, existing_labels, break_dest, height_) + ofst = self._compile_r(ofst, height) return ofst + [pushsym, "ADD"] - def _height_of(witharg): - ret = height - withargs[witharg] - if ret > 16: - raise Exception("With statement too deep") - return ret + def _compile_r(self, code: IRnode, height: int) -> list[AssemblyInstruction]: + asm = self._step_r(code, height) + for i, item in enumerate(asm): + if isinstance(item, str) and not isinstance(item, TaggedInstruction): + # CMC 2025-05-08 this is O(n^2).. :'( + asm[i] = TaggedInstruction(item, code.ast_source, code.error_msg) + + return asm + + def _step_r(self, code: IRnode, height: int) -> list[AssemblyInstruction]: + def _height_of(varname): + ret = height - self.withargs[varname] + if ret > 16: + raise Exception("With statement too deep") + return ret + + if isinstance(code.value, str) and code.value.upper() in get_opcodes(): + o = [] + for i, c in enumerate(reversed(code.args)): + o.extend(self._compile_r(c, height + i)) + o.append(code.value.upper()) + return o + + # Numbers + if isinstance(code.value, int): + if code.value < -(2**255): + raise Exception(f"Value too low: {code.value}") + elif code.value >= 2**256: + raise Exception(f"Value too high: {code.value}") + + return PUSH(code.value % 2**256) + + # Variables connected to with statements + if isinstance(code.value, str) and code.value in self.withargs: + return ["DUP" + str(_height_of(code.value))] + + # Setting variables connected to with statements + if code.value == "set": + if len(code.args) != 2 or code.args[0].value not in self.withargs: + raise Exception("Set expects two arguments, the first being a stack variable") + if height - self.withargs[code.args[0].value] > 16: + raise Exception("With statement too deep") + swap_instr = "SWAP" + str(height - self.withargs[code.args[0].value]) + return self._compile_r(code.args[1], height) + [swap_instr, "POP"] + + # Pass statements + # TODO remove "dummy"; no longer needed + if code.value in ("pass", "dummy"): + return [] + + # "mload" from data section of the currently executing code + if code.value == "dload": + loc = code.args[0] + + o = [] + # codecopy 32 bytes to FREE_VAR_SPACE, then mload from FREE_VAR_SPACE + o.extend(PUSH(32)) + + o.extend(self._data_ofst_of(Label("code_end"), loc, height)) + + o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["CODECOPY"]) + o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["MLOAD"]) + return o + + # batch copy from data section of the currently executing code to memory + # (probably should have named this dcopy but oh well) + if code.value == "dloadbytes": + dst = code.args[0] + src = code.args[1] + len_ = code.args[2] + + o = [] + o.extend(self._compile_r(len_, height)) + o.extend(self._data_ofst_of(Label("code_end"), src, height + 1)) + o.extend(self._compile_r(dst, height + 2)) + o.extend(["CODECOPY"]) + return o + + # "mload" from the data section of (to-be-deployed) runtime code + if code.value == "iload": + loc = code.args[0] + + o = [] + o.extend(self._data_ofst_of("_mem_deploy_end", loc, height)) + o.append("MLOAD") + + return o + + # "mstore" to the data section of (to-be-deployed) runtime code + if code.value == "istore": + loc = code.args[0] + val = code.args[1] + + o = [] + o.extend(self._compile_r(val, height)) + o.extend(self._data_ofst_of("_mem_deploy_end", loc, height + 1)) + o.append("MSTORE") + + return o + + # batch copy from memory to the data section of runtime code + if code.value == "istorebytes": + raise Exception("unimplemented") + + # If statements (2 arguments, ie. if x: y) + if code.value == "if" and len(code.args) == 2: + o = [] + o.extend(self._compile_r(code.args[0], height)) + end_symbol = self.mksymbol("join") + o.extend(["ISZERO", *JUMPI(end_symbol)]) + o.extend(self._compile_r(code.args[1], height)) + o.extend([end_symbol]) + return o + + # If statements (3 arguments, ie. if x: y, else: z) + if code.value == "if" and len(code.args) == 3: + o = [] + o.extend(self._compile_r(code.args[0], height)) + mid_symbol = self.mksymbol("else") + end_symbol = self.mksymbol("join") + o.extend(["ISZERO", *JUMPI(mid_symbol)]) + o.extend(self._compile(code.args[1], height)) + o.extend([*JUMP(end_symbol), mid_symbol]) + o.extend(self._compile(code.args[2], height)) + o.extend([end_symbol]) + return o + + # repeat(counter_location, start, rounds, rounds_bound, body) + # basically a do-while loop: + # + # assert(rounds <= rounds_bound) + # if (rounds > 0) { + # do { + # body; + # } while (++i != start + rounds) + # } + if code.value == "repeat": + o = [] + if len(code.args) != 5: # pragma: nocover + raise CompilerPanic("bad number of repeat args") + + i_name = code.args[0] + start = code.args[1] + rounds = code.args[2] + rounds_bound = code.args[3] + body = code.args[4] + + entry_dest = self.mksymbol("loop_start") + continue_dest = self.mksymbol("loop_continue") + exit_dest = self.mksymbol("loop_exit") + + # stack: [] + o.extend(self._compile_r(start, height)) + + o.extend(self._compile_r(rounds, height + 1)) + + # stack: i + + # assert rounds <= round_bound + if rounds != rounds_bound: + # stack: i, rounds + o.extend(self._compile_r(rounds_bound, height + 2)) + # stack: i, rounds, rounds_bound + # assert 0 <= rounds <= rounds_bound (for rounds_bound < 2**255) + # TODO this runtime assertion shouldn't fail for + # internally generated repeats. + o.extend(["DUP2", "GT"] + self._assert_false()) + + # stack: i, rounds + # if (0 == rounds) { goto end_dest; } + o.extend(["DUP1", "ISZERO", *JUMPI(exit_dest)]) + + # stack: start, rounds + if start.value != 0: + o.extend(["DUP2", "ADD"]) + + # stack: i, exit_i + o.extend(["SWAP1"]) + + if i_name.value in self.withargs: + raise CompilerPanic(f"shadowed loop variable {i_name}") + self.withargs[i_name.value] = height + 1 + + # stack: exit_i, i + o.extend([entry_dest]) + + with self.modify_breakdest(exit_dest, continue_dest, height + 2): + o.extend(self._compile_r(body, height + 2)) + + del withargs[i_name.value] + + # clean up any stack items left by body + o.extend(["POP"] * body.valency) + + # stack: exit_i, i + # increment i: + o.extend([continue_dest, "PUSH1", 1, "ADD"]) + + # stack: exit_i, i+1 (new_i) + # if (exit_i != new_i) { goto entry_dest } + o.extend(["DUP2", "DUP2", "XOR", *JUMPI(entry_dest)]) + o.extend([exit_dest, "POP", "POP"]) + + return o + + # Continue to the next iteration of the for loop + if code.value == "continue": + if not self.break_dest: + raise CompilerPanic("Invalid break") + _dest, continue_dest, _break_height = self.break_dest + return [*JUMP(continue_dest)] + + # Break from inside a for loop + if code.value == "break": + if not self.break_dest: + raise CompilerPanic("Invalid break") + dest, _continue_dest, break_height = self.break_dest + + n_local_vars = height - break_height + # clean up any stack items declared in the loop body + cleanup_local_vars = ["POP"] * n_local_vars + return cleanup_local_vars + [*JUMP(dest)] + + # Break from inside one or more for loops prior to a return statement inside the loop + if code.value == "cleanup_repeat": + if not self.break_dest: + raise CompilerPanic("Invalid break") + # clean up local vars and internal loop vars + _, _, break_height = self.break_dest + # except don't pop label params + if "return_buffer" in self.withargs: + break_height -= 1 + if "return_pc" in self.withargs: + break_height -= 1 + return ["POP"] * break_height + + # With statements + if code.value == "with": + o = [] + o.extend(self._compile_r(code.args[1], height)) + old = self.withargs.get(code.args[0].value, None) + self.withargs[code.args[0].value] = height + o.extend(self._compile_r(code.args[2], height + 1)) + if code.args[2].valency: + o.extend(["SWAP1", "POP"]) + else: + o.extend(["POP"]) + if old is not None: + self.withargs[code.args[0].value] = old + else: + del self.withargs[code.args[0].value] + return o - if existing_labels is None: - existing_labels = set() - if not isinstance(existing_labels, set): - raise CompilerPanic(f"must be set(), but got {type(existing_labels)}") - - # Opcodes - if isinstance(code.value, str) and code.value.upper() in get_opcodes(): - o = [] - for i, c in enumerate(code.args[::-1]): - o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) - o.append(code.value.upper()) - return o - - # Numbers - elif isinstance(code.value, int): - if code.value < -(2**255): - raise Exception(f"Value too low: {code.value}") - elif code.value >= 2**256: - raise Exception(f"Value too high: {code.value}") - return PUSH(code.value % 2**256) - - # Variables connected to with statements - elif isinstance(code.value, str) and code.value in withargs: - return ["DUP" + str(_height_of(code.value))] - - # Setting variables connected to with statements - elif code.value == "set": - if len(code.args) != 2 or code.args[0].value not in withargs: - raise Exception("Set expects two arguments, the first being a stack variable") - if height - withargs[code.args[0].value] > 16: - raise Exception("With statement too deep") - return _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height) + [ - "SWAP" + str(height - withargs[code.args[0].value]), - "POP", - ] - - # Pass statements - # TODO remove "dummy"; no longer needed - elif code.value in ("pass", "dummy"): - return [] - - # "mload" from data section of the currently executing code - elif code.value == "dload": - loc = code.args[0] - - o = [] - # codecopy 32 bytes to FREE_VAR_SPACE, then mload from FREE_VAR_SPACE - o.extend(PUSH(32)) - - o.extend(_data_ofst_of(Label("code_end"), loc, height + 1)) - o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["CODECOPY"]) - o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["MLOAD"]) - return o - - # batch copy from data section of the currently executing code to memory - # (probably should have named this dcopy but oh well) - elif code.value == "dloadbytes": - dst = code.args[0] - src = code.args[1] - len_ = code.args[2] - - o = [] - o.extend(_compile_to_assembly(len_, withargs, existing_labels, break_dest, height)) - o.extend(_data_ofst_of(Label("code_end"), src, height + 1)) - o.extend(_compile_to_assembly(dst, withargs, existing_labels, break_dest, height + 2)) - o.extend(["CODECOPY"]) - return o - - # "mload" from the data section of (to-be-deployed) runtime code - elif code.value == "iload": - loc = code.args[0] - - o = [] - o.extend(_data_ofst_of("_mem_deploy_end", loc, height)) - o.append("MLOAD") - - return o - - # "mstore" to the data section of (to-be-deployed) runtime code - elif code.value == "istore": - loc = code.args[0] - val = code.args[1] - - o = [] - o.extend(_compile_to_assembly(val, withargs, existing_labels, break_dest, height)) - o.extend(_data_ofst_of("_mem_deploy_end", loc, height + 1)) - o.append("MSTORE") - - return o - - # batch copy from memory to the data section of runtime code - elif code.value == "istorebytes": - raise Exception("unimplemented") - - # If statements (2 arguments, ie. if x: y) - elif code.value == "if" and len(code.args) == 2: - o = [] - o.extend(_compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height)) - end_symbol = mksymbol("join") - o.extend(["ISZERO", *JUMPI(end_symbol)]) - o.extend(_compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) - o.extend([end_symbol]) - return o - # If statements (3 arguments, ie. if x: y, else: z) - elif code.value == "if" and len(code.args) == 3: - o = [] - o.extend(_compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height)) - mid_symbol = mksymbol("else") - end_symbol = mksymbol("join") - o.extend(["ISZERO", *JUMPI(mid_symbol)]) - o.extend(_compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) - o.extend([*JUMP(end_symbol), mid_symbol]) - o.extend(_compile_to_assembly(code.args[2], withargs, existing_labels, break_dest, height)) - o.extend([end_symbol]) - return o - - # repeat(counter_location, start, rounds, rounds_bound, body) - # basically a do-while loop: - # - # assert(rounds <= rounds_bound) - # if (rounds > 0) { - # do { - # body; - # } while (++i != start + rounds) - # } - elif code.value == "repeat": - o = [] - if len(code.args) != 5: # pragma: nocover - raise CompilerPanic("bad number of repeat args") - - i_name = code.args[0] - start = code.args[1] - rounds = code.args[2] - rounds_bound = code.args[3] - body = code.args[4] - - entry_dest, continue_dest, exit_dest = ( - mksymbol("loop_start"), - mksymbol("loop_continue"), - mksymbol("loop_exit"), - ) + # runtime statement (used to deploy runtime code) + elif code.value == "deploy": + memsize = code.args[0].value # used later to calculate _mem_deploy_start + ir = code.args[1] + immutables_len = code.args[2].value + assert isinstance(memsize, int), "non-int memsize" + assert isinstance(immutables_len, int), "non-int immutables_len" + + runtime_begin = Label("runtime_begin") + + o = [] + + # COPY the code to memory for deploy + o.extend( + [ + PUSHLABEL(Label("subcode_size")), + PUSHLABEL(runtime_begin), + "_mem_deploy_start", + "CODECOPY", + ] + ) + + # calculate the len of runtime code + o.extend( + self._data_ofst_of(Label("subcode_size"), IRnode(immutables_len), height) + ) # stack: len + o.extend(["_mem_deploy_start"]) # stack: len mem_ofst + o.extend(["RETURN"]) + + o.extend(self._create_postambles()) + + for data in self.data_segments: + o.extend(self._compile_data_segment(data)) + + self.freeze_data_segments = True + + o.append(CONST("ctor_mem_size", memsize)) + o.append(CONST("immutables_len", immutables_len)) + + runtime_assembly = _IRnodeLowerer().compile_to_assembly(ir) + runtime_bytecode, _ = assembly_to_evm(runtime_assembly) + + o.append(runtime_begin) + + o.append(DATA_ITEM(runtime_bytecode)) - # stack: [] - o.extend(_compile_to_assembly(start, withargs, existing_labels, break_dest, height)) + o.append(Label("runtime_end")) - o.extend(_compile_to_assembly(rounds, withargs, existing_labels, break_dest, height + 1)) + return o - # stack: i + # Seq (used to piece together multiple statements) + if code.value == "seq": + o = [] + for arg in code.args: + o.extend(self._compile_r(arg, height)) + if arg.valency == 1 and arg != code.args[-1]: + o.append("POP") + return o - # assert rounds <= round_bound - if rounds != rounds_bound: - # stack: i, rounds + # Seq without popping. + # unreachable keyword produces INVALID opcode + if code.value == "assert_unreachable": + o = self._compile_r(code.args[0], height) + end_symbol = self.mksymbol("reachable") + o.extend([*JUMPI(end_symbol), "INVALID", end_symbol]) + return o + + # Assert (if false, exit) + if code.value == "assert": + o = self._compile_r(code.args[0], height) + o.extend(["ISZERO"]) + o.extend(self._assert_false()) + return o + + # SHA3 a single value + if code.value == "sha3_32": + # TODO: this should not be emitted anymore. + o = self._compile_r(code.args[0], height) o.extend( - _compile_to_assembly( - rounds_bound, withargs, existing_labels, break_dest, height + 2 - ) + [ + *PUSH(MemoryPositions.FREE_VAR_SPACE), + "MSTORE", + *PUSH(32), + *PUSH(MemoryPositions.FREE_VAR_SPACE), + "SHA3", + ] ) - # stack: i, rounds, rounds_bound - # assert 0 <= rounds <= rounds_bound (for rounds_bound < 2**255) - # TODO this runtime assertion shouldn't fail for - # internally generated repeats. - o.extend(["DUP2", "GT"] + _assert_false()) - - # stack: i, rounds - # if (0 == rounds) { goto end_dest; } - o.extend(["DUP1", "ISZERO", *JUMPI(exit_dest)]) - - # stack: start, rounds - if start.value != 0: - o.extend(["DUP2", "ADD"]) - - # stack: i, exit_i - o.extend(["SWAP1"]) - - if i_name.value in withargs: - raise CompilerPanic(f"shadowed loop variable {i_name}") - withargs[i_name.value] = height + 1 - - # stack: exit_i, i - o.extend([entry_dest]) - o.extend( - _compile_to_assembly( - body, withargs, existing_labels, (exit_dest, continue_dest, height + 2), height + 2 + return o + + # SHA3 a 64 byte value + if code.value == "sha3_64": + o = self._compile_r(code.args[0], height) + o.extend(self._compile_r(code.args[1], height + 1)) + o.extend( + [ + *PUSH(MemoryPositions.FREE_VAR_SPACE2), + "MSTORE", + *PUSH(MemoryPositions.FREE_VAR_SPACE), + "MSTORE", + *PUSH(64), + *PUSH(MemoryPositions.FREE_VAR_SPACE), + "SHA3", + ] ) - ) + return o + + if code.value == "select": + # b ^ ((a ^ b) * cond) where cond is 1 or 0 + # let t = a ^ b + cond = code.args[0] + a = code.args[1] + b = code.args[2] + + o = [] + o.extend(self._compile_r(b, height)) + o.extend(self._compile_r(a, height + 1)) + # stack: b a + o.extend(["DUP2", "XOR"]) + # stack: b t + o.extend(self._compile_r(cond, height + 2)) + # stack: b t cond + o.extend(["MUL", "XOR"]) + + # stack: b ^ (t * cond) + return o + + # <= operator + if code.value == "le": + expanded_ir = IRnode.from_list(["iszero", ["gt", code.args[0], code.args[1]]]) + return self._compile_r(expanded_ir, height) + + # >= operator + if code.value == "ge": + expanded_ir = IRnode.from_list(["iszero", ["lt", code.args[0], code.args[1]]]) + return self._compile_r(expanded_ir, height) + # <= operator + if code.value == "sle": + expanded_ir = IRnode.from_list(["iszero", ["sgt", code.args[0], code.args[1]]]) + return self._compile_r(expanded_ir, height) + # >= operator + if code.value == "sge": + expanded_ir = IRnode.from_list(["iszero", ["slt", code.args[0], code.args[1]]]) + return self._compile_r(expanded_ir, height) + + # != operator + if code.value == "ne": + expanded_ir = IRnode.from_list(["iszero", ["eq", code.args[0], code.args[1]]]) + return self._compile_r(expanded_ir, height) + + # e.g. 95 -> 96, 96 -> 96, 97 -> 128 + if code.value == "ceil32": + # floor32(x) = x - x % 32 == x & 0b11..100000 == x & (~31) + # ceil32(x) = floor32(x + 31) == (x + 31) & (~31) + x = code.args[0] + expanded_ir = IRnode.from_list(["and", ["add", x, 31], ["not", 31]]) + return self._compile_r(expanded_ir, height) + + if code.value == "data": + data_node = [DataHeader(Label(code.args[0].value))] + + for c in code.args[1:]: + if isinstance(c.value, bytes): + data_node.append(DATA_ITEM(c.value)) + elif isinstance(c, IRnode): + assert c.value == "symbol" + assert len(c.args) == 1 + assert isinstance(c.args[0].value, str), (type(c.args[0].value), c) + data_node.append(DATA_ITEM(Label(c.args[0].value))) + else: + raise ValueError(f"Invalid data: {type(c)} {c}") + + self.data_segments.append(data_node) + return [] + + # jump to a symbol, and push variable # of arguments onto stack + if code.value == "goto": + o = [] + for i, c in enumerate(reversed(code.args[1:])): + o.extend(self._compile_r(c, height + i)) + o.extend([*JUMP(Label(code.args[0].value))]) + return o + + if code.value == "djump": + o = [] + # "djump" compiles to a raw EVM jump instruction + jump_target = code.args[0] + o.extend(self._compile_r(jump_target, height)) + o.append("JUMP") + return o + # push a literal symbol + if code.value == "symbol": + return [PUSHLABEL(Label(code.args[0].value))] + + # set a symbol as a location. + if code.value == "label": + label_name = code.args[0].value + assert isinstance(label_name, str) + + if label_name in self.existing_labels: + raise Exception(f"Label with name {label_name} already exists!") + else: + self.existing_labels.add(label_name) - del withargs[i_name.value] - - # clean up any stack items left by body - o.extend(["POP"] * body.valency) - - # stack: exit_i, i - # increment i: - o.extend([continue_dest, "PUSH1", 1, "ADD"]) - - # stack: exit_i, i+1 (new_i) - # if (exit_i != new_i) { goto entry_dest } - o.extend(["DUP2", "DUP2", "XOR", *JUMPI(entry_dest)]) - o.extend([exit_dest, "POP", "POP"]) - - return o - - # Continue to the next iteration of the for loop - elif code.value == "continue": - if not break_dest: - raise CompilerPanic("Invalid break") - dest, continue_dest, break_height = break_dest - return [*JUMP(continue_dest)] - # Break from inside a for loop - elif code.value == "break": - if not break_dest: - raise CompilerPanic("Invalid break") - dest, continue_dest, break_height = break_dest - - n_local_vars = height - break_height - # clean up any stack items declared in the loop body - cleanup_local_vars = ["POP"] * n_local_vars - return cleanup_local_vars + [*JUMP(dest)] - # Break from inside one or more for loops prior to a return statement inside the loop - elif code.value == "cleanup_repeat": - if not break_dest: - raise CompilerPanic("Invalid break") - # clean up local vars and internal loop vars - _, _, break_height = break_dest - # except don't pop label params - if "return_buffer" in withargs: - break_height -= 1 - if "return_pc" in withargs: - break_height -= 1 - return ["POP"] * break_height - # With statements - elif code.value == "with": - o = [] - o.extend(_compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) - old = withargs.get(code.args[0].value, None) - withargs[code.args[0].value] = height - o.extend( - _compile_to_assembly(code.args[2], withargs, existing_labels, break_dest, height + 1) - ) - if code.args[2].valency: - o.extend(["SWAP1", "POP"]) - else: - o.extend(["POP"]) - if old is not None: - withargs[code.args[0].value] = old - else: - del withargs[code.args[0].value] - return o - - # runtime statement (used to deploy runtime code) - elif code.value == "deploy": - memsize = code.args[0].value # used later to calculate _mem_deploy_start - ir = code.args[1] - immutables_len = code.args[2].value - assert isinstance(memsize, int), "non-int memsize" - assert isinstance(immutables_len, int), "non-int immutables_len" - - runtime_begin = mksymbol("runtime_begin") - - subcode = _compile_to_assembly(ir) - - o = [] - - # COPY the code to memory for deploy - o.extend([PUSHLABEL(Label("subcode_size")), PUSHLABEL(runtime_begin), "_mem_deploy_start", "CODECOPY"]) - - # calculate the len of runtime code - o.extend(_data_ofst_of(Label("subcode_size"), IRnode(immutables_len), height)) # stack: len - o.extend(["_mem_deploy_start"]) # stack: len mem_ofst - o.extend(["RETURN"]) - - # since the asm data structures are very primitive, to make sure - # assembly_to_evm is able to calculate data offsets correctly, - # we pass the memsize via magic opcodes to the subcode - subcode = [RuntimeHeader(runtime_begin, memsize, immutables_len)] + subcode - - # append the runtime code after the ctor code - # `append(...)` call here is intentional. - # each sublist is essentially its own program with its - # own symbols. - # in the later step when the "ir" block compiled to EVM, - # symbols in subcode are resolved to position from start of - # runtime-code (instead of position from start of bytecode). - o.append(subcode) - - return o - - # Seq (used to piece together multiple statements) - elif code.value == "seq": - o = [] - for arg in code.args: - o.extend(_compile_to_assembly(arg, withargs, existing_labels, break_dest, height)) - if arg.valency == 1 and arg != code.args[-1]: - o.append("POP") - return o - # Seq without popping. - # unreachable keyword produces INVALID opcode - elif code.value == "assert_unreachable": - o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) - end_symbol = mksymbol("reachable") - o.extend([*JUMPI(end_symbol), "INVALID", end_symbol]) - return o - # Assert (if false, exit) - elif code.value == "assert": - o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) - o.extend(["ISZERO"]) - o.extend(_assert_false()) - return o - - # SHA3 a single value - elif code.value == "sha3_32": - o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) - o.extend( - [ - *PUSH(MemoryPositions.FREE_VAR_SPACE), - "MSTORE", - *PUSH(32), - *PUSH(MemoryPositions.FREE_VAR_SPACE), - "SHA3", - ] - ) - return o - # SHA3 a 64 byte value - elif code.value == "sha3_64": - o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) - o.extend( - _compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height + 1) - ) - o.extend( - [ - *PUSH(MemoryPositions.FREE_VAR_SPACE2), - "MSTORE", - *PUSH(MemoryPositions.FREE_VAR_SPACE), - "MSTORE", - *PUSH(64), - *PUSH(MemoryPositions.FREE_VAR_SPACE), - "SHA3", - ] - ) - return o - elif code.value == "select": - # b ^ ((a ^ b) * cond) where cond is 1 or 0 - # let t = a ^ b - cond = code.args[0] - a = code.args[1] - b = code.args[2] - - o = [] - o.extend(_compile_to_assembly(b, withargs, existing_labels, break_dest, height)) - o.extend(_compile_to_assembly(a, withargs, existing_labels, break_dest, height + 1)) - # stack: b a - o.extend(["DUP2", "XOR"]) - # stack: b t - o.extend(_compile_to_assembly(cond, withargs, existing_labels, break_dest, height + 2)) - # stack: b t cond - o.extend(["MUL", "XOR"]) - - # stack: b ^ (t * cond) - return o - - # <= operator - elif code.value == "le": - return _compile_to_assembly( - IRnode.from_list(["iszero", ["gt", code.args[0], code.args[1]]]), - withargs, - existing_labels, - break_dest, - height, - ) - # >= operator - elif code.value == "ge": - return _compile_to_assembly( - IRnode.from_list(["iszero", ["lt", code.args[0], code.args[1]]]), - withargs, - existing_labels, - break_dest, - height, - ) - # <= operator - elif code.value == "sle": - return _compile_to_assembly( - IRnode.from_list(["iszero", ["sgt", code.args[0], code.args[1]]]), - withargs, - existing_labels, - break_dest, - height, - ) - # >= operator - elif code.value == "sge": - return _compile_to_assembly( - IRnode.from_list(["iszero", ["slt", code.args[0], code.args[1]]]), - withargs, - existing_labels, - break_dest, - height, - ) - # != operator - elif code.value == "ne": - return _compile_to_assembly( - IRnode.from_list(["iszero", ["eq", code.args[0], code.args[1]]]), - withargs, - existing_labels, - break_dest, - height, - ) + if code.args[1].value != "var_list": + raise CodegenPanic("2nd arg to label must be var_list") + var_args = code.args[1].args - # e.g. 95 -> 96, 96 -> 96, 97 -> 128 - elif code.value == "ceil32": - # floor32(x) = x - x % 32 == x & 0b11..100000 == x & (~31) - # ceil32(x) = floor32(x + 31) == (x + 31) & (~31) - x = code.args[0] - return _compile_to_assembly( - IRnode.from_list(["and", ["add", x, 31], ["not", 31]]), - withargs, - existing_labels, - break_dest, - height, - ) + body = code.args[2] + + # new scope + height = 0 + old_withargs = self.withargs + + self.withargs = {} + + for arg in reversed(var_args): + assert isinstance(arg.value, str) # sanity + self.withargs[arg.value] = height + height += 1 + + body_asm = self._compile_r(body, height) + # pop_scoped_vars = ["POP"] * height + # for now, _rewrite_return_sequences forces + # label params to be consumed implicitly + pop_scoped_vars: list = [] - elif code.value == "data": - data_node = [DataHeader(Label(code.args[0].value))] - - for c in code.args[1:]: - if isinstance(c.value, int): - assert 0 <= c < 256, f"invalid data byte {c}" - data_node.append(c.value) - elif isinstance(c.value, bytes): - data_node.append(c.value) - elif isinstance(c, IRnode): - assert c.value == "symbol" - assert len(c.args) == 1 - assert isinstance(c.args[0].value, str), (type(c.args[0].value), c) - data_node.append(Label(c.args[0].value)) + self.withargs = old_withargs + + return [Label(label_name)] + body_asm + pop_scoped_vars + + if code.value == "unique_symbol": + symbol = code.args[0].value + assert isinstance(symbol, str) + + if symbol in self.existing_labels: + raise Exception(f"symbol {symbol} already exists!") else: - raise ValueError(f"Invalid data: {type(c)} {c}") - - # intentionally return a sublist. - return [data_node] - - # jump to a symbol, and push variable # of arguments onto stack - elif code.value == "goto": - o = [] - for i, c in enumerate(reversed(code.args[1:])): - o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) - o.extend([*JUMP(Label(code.args[0].value))]) - return o - elif code.value == "djump": - o = [] - # "djump" compiles to a raw EVM jump instruction - jump_target = code.args[0] - o.extend( - _compile_to_assembly(jump_target, withargs, existing_labels, break_dest, height) - ) - o.append("JUMP") - return o - # push a literal symbol - elif code.value == "symbol": - return [PUSHLABEL(Label(code.args[0].value))] - # set a symbol as a location. - elif code.value == "label": - label_name = code.args[0].value - assert isinstance(label_name, str) - - if label_name in existing_labels: - raise Exception(f"Label with name {label_name} already exists!") - else: - existing_labels.add(label_name) + self.existing_labels.add(symbol) - if code.args[1].value != "var_list": - raise CodegenPanic("2nd arg to label must be var_list") - var_args = code.args[1].args + return [] - body = code.args[2] + if code.value == "exit_to": + # currently removed by _rewrite_return_sequences + raise CodegenPanic("exit_to not implemented yet!") - # new scope - height = 0 - withargs = {} + # inject debug opcode. + if code.value == "debugger": + return mkdebug(pc_debugger=False, ast_source=code.ast_source) - for arg in reversed(var_args): - assert isinstance( - arg.value, str - ) # already checked for higher up but only the paranoid survive - withargs[arg.value] = height - height += 1 + # inject debug opcode. + if code.value == "pc_debugger": + return mkdebug(pc_debugger=True, ast_source=code.ast_source) - body_asm = _compile_to_assembly( - body, withargs=withargs, existing_labels=existing_labels, height=height - ) - # pop_scoped_vars = ["POP"] * height - # for now, _rewrite_return_sequences forces - # label params to be consumed implicitly - pop_scoped_vars = [] + raise CompilerPanic(f"invalid IRnode: {type(code)} {code}") # pragma: no cover - return [Label(label_name)] + body_asm + pop_scoped_vars + def _create_postambles(self): + ret = [] + # for some reason there might not be a STOP at the end of asm_ops. + # (generally vyper programs will have it but raw IR might not). + ret.append("STOP") - elif code.value == "unique_symbol": - symbol = code.args[0].value - assert isinstance(symbol, str) + # common revert block + if self.global_revert_label is not None: + ret.extend([self.global_revert_label, *PUSH(0), "DUP1", "REVERT"]) - if symbol in existing_labels: - raise Exception(f"symbol {symbol} already exists!") - else: - existing_labels.add(symbol) + return ret - return [] + def _assert_false(self): + if self.global_revert_label is None: + self.global_revert_label = self.mksymbol("revert") + # use a shared failure block for common case of assert(x). + return JUMPI(self.global_revert_label) - elif code.value == "exit_to": - raise CodegenPanic("exit_to not implemented yet!") - # inject debug opcode. - elif code.value == "debugger": - return mkdebug(pc_debugger=False, ast_source=code.ast_source) - # inject debug opcode. - elif code.value == "pc_debugger": - return mkdebug(pc_debugger=True, ast_source=code.ast_source) - else: # pragma: no cover - raise ValueError(f"Weird code element: {type(code)} {code}") +############################## +# assembly to evm utils +############################## def getpos(node): @@ -858,7 +868,7 @@ def getpos(node): def note_line_num(line_number_map, pc, item): # Record AST attached to pc - if isinstance(item, Instruction): + if isinstance(item, TaggedInstruction): if (ast_node := item.ast_source) is not None: ast_node = ast_node.get_original_node() if hasattr(ast_node, "node_id"): @@ -881,6 +891,10 @@ def note_breakpoint(line_number_map, pc, item): line_number_map["breakpoints"].add(item.lineno + 1) +############################## +# assembly optimizer +############################## + _TERMINAL_OPS = ("JUMP", "RETURN", "REVERT", "STOP", "INVALID") @@ -894,8 +908,7 @@ def _prune_unreachable_code(assembly): # find the next jumpdest or sublist for j in range(i + 1, len(assembly)): next_is_jumpdest = j < len(assembly) and is_symbol(assembly[j]) - next_is_list = isinstance(assembly[j], list) - if next_is_jumpdest or next_is_list: + if next_is_jumpdest: break else: # fixup an off-by-one if we made it to the end of the assembly @@ -964,7 +977,7 @@ def _merge_jumpdests(assembly): changed = False i = 0 while i < len(assembly) - 2: - #if is_symbol(assembly[i]) and assembly[i + 1] == "JUMPDEST": + # if is_symbol(assembly[i]) and assembly[i + 1] == "JUMPDEST": if is_symbol(assembly[i]): current_symbol = assembly[i] if is_symbol(assembly[i + 1]): @@ -973,7 +986,10 @@ def _merge_jumpdests(assembly): new_symbol = assembly[i + 1] if new_symbol != current_symbol: for j in range(len(assembly)): - if isinstance(assembly[j], PUSHLABEL) and assembly[j].label == current_symbol: + if ( + isinstance(assembly[j], PUSHLABEL) + and assembly[j].label == current_symbol + ): assembly[j].label = new_symbol changed = True elif isinstance(assembly[i + 1], PUSHLABEL) and assembly[i + 2] == "JUMP": @@ -1111,10 +1127,6 @@ def _stack_peephole_opts(assembly): # optimize assembly, in place def optimize_assembly(assembly): - for x in assembly: - if isinstance(x, list) and isinstance(x[0], RuntimeHeader): - optimize_assembly(x) - for _ in range(1024): changed = False @@ -1187,11 +1199,9 @@ def _length_of_data(assembly): @dataclass class RuntimeHeader: label: Label - ctor_mem_size: int - immutables_len: int def __repr__(self): - return f"" + return f"" @dataclass @@ -1202,27 +1212,9 @@ def __repr__(self): return f"DATA {self.label.label}" -def _relocate_segments(assembly): - # relocate all data segments to the end, otherwise data could be - # interpreted as PUSH instructions and mangle otherwise valid jumpdests - # relocate all runtime segments to the end as well - data_segments = [] - non_data_segments = [] - code_segments = [] - for t in assembly: - if isinstance(t, list): - if isinstance(t[0], DataHeader): - data_segments.append(t) - else: - _relocate_segments(t) # recurse - assert isinstance(t[0], RuntimeHeader) - code_segments.append(t) - else: - non_data_segments.append(t) - assembly.clear() - assembly.extend(non_data_segments) - assembly.extend(code_segments) - assembly.extend(data_segments) +############################## +# assembly to evm bytecode +############################## # TODO: change API to split assembly_to_evm and assembly_to_source/symbol_maps @@ -1264,30 +1256,22 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) # to do this, we first do a single pass to compile any runtime code # and use that to calculate mem_ofst_size. mem_ofst_size, ctor_mem_size = None, None - max_mem_ofst = 0 - for i, item in enumerate(assembly): - if isinstance(item, list) and isinstance(item[0], RuntimeHeader): - assert runtime_code is None, "Multiple subcodes" - - assert ctor_mem_size is None - ctor_mem_size = item[0].ctor_mem_size - runtime_code, runtime_map = assembly_to_evm(item[1:]) - - runtime_code_start, runtime_code_end = _runtime_code_offsets( - ctor_mem_size, len(runtime_code) - ) - assert runtime_code_end - runtime_code_start == len(runtime_code) + ## resolve constants + for item in assembly: + if isinstance(item, CONST): + # should this be merged into the symbol map? + const_map[item.name] = item.value + # find the maximum mem_ofst + max_mem_ofst = 0 + for item in assembly: if is_ofst(item) and is_mem_sym(item.label): max_mem_ofst = max(item.ofst, max_mem_ofst) if runtime_code_end is not None: mem_ofst_size = calc_mem_ofst_size(runtime_code_end + max_mem_ofst) - data_section_lengths = [] - immutables_len = None - # go through the code, resolving symbolic locations # (i.e. JUMPDEST locations) to actual code locations for i, item in enumerate(assembly): @@ -1332,21 +1316,8 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits else: pc += mem_ofst_size + 1 - elif isinstance(item, list) and isinstance(item[0], RuntimeHeader): - # we are in initcode - symbol_map[item[0].label] = pc - # add source map for all items in the runtime map - t = adjust_pc_maps(runtime_map, pc) - for key in line_number_map: - line_number_map[key].update(t[key]) - immutables_len = item[0].immutables_len - pc += len(runtime_code) - # grab lengths of data sections from the runtime - for t in item: - if isinstance(t, list) and isinstance(t[0], DataHeader): - data_section_lengths.append(_length_of_data(t)) - elif isinstance(item, list) and isinstance(item[0], DataHeader): + elif isinstance(item, DataHeader): symbol_map[item[0].label] = pc pc += _length_of_data(item) else: @@ -1356,6 +1327,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) if compiler_metadata is not None: # this will hold true when we are in initcode assert immutables_len is not None + immutables_len = symbol_map["immutables_len"] metadata = ( compiler_metadata, len(runtime_code), @@ -1374,6 +1346,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) symbol_map[Label("code_end")] = pc symbol_map["_mem_deploy_start"] = runtime_code_start symbol_map["_mem_deploy_end"] = runtime_code_end + if runtime_code is not None: symbol_map[Label("subcode_size")] = len(runtime_code) @@ -1383,8 +1356,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) # now that all symbols have been resolved, generate bytecode # using the symbol map - for i, item in enumerate(assembly): - + for item in assembly: if item in ("DEBUG",): continue # skippable opcodes @@ -1404,7 +1376,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) elif is_ofst(item): # PUSH_OFST (LABEL foo) 32 - # PUSH_OFST _mem_foo 32 + # PUSH_OFST (const foo) 32 ofst = symbol_map[item.label] + item.ofst n = mem_ofst_size if is_mem_sym(item.label) else SYMBOL_SIZE bytecode, _ = assembly_to_evm(PUSH_N(ofst, n)) @@ -1420,10 +1392,14 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) ret.append(DUP_OFFSET + int(item[3:])) elif item[:4] == "SWAP": ret.append(SWAP_OFFSET + int(item[4:])) - elif isinstance(item, list) and isinstance(item[0], RuntimeHeader): - ret.extend(runtime_code) - elif isinstance(item, list) and isinstance(item[0], DataHeader): - ret.extend(_data_to_evm(item, symbol_map)) + elif isinstance(item, DATA_ITEM): + if isinstance(item.data, bytes): + ret.extend(item.data) + elif isinstance(item.data, Label): + symbolbytes = symbol_map[item.data].to_bytes(SYMBOL_SIZE, "big") + ret.extend(symbolbytes) + else: + raise CompilerPanic("Invalid data {type(item.data)}, {item.data}") else: # pragma: no cover # unreachable raise ValueError(f"Weird symbol in assembly: {type(item)} {item}") diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index a644b0de15..9098b7285b 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -4,13 +4,12 @@ from vyper.ir.compile_ir import ( PUSH, PUSH_OFST, - is_mem_sym, PUSHLABEL, DataHeader, - Instruction, Label, RuntimeHeader, - mksymbol, + TaggedInstruction, + is_mem_sym, optimize_assembly, ) from vyper.utils import MemoryPositions, OrderedSet, wrap256 @@ -117,8 +116,8 @@ def apply_line_numbers(inst: IRInstruction, asm) -> list[str]: ret = [] for op in asm: - if isinstance(op, str) and not isinstance(op, Instruction): - ret.append(Instruction(op, inst.ast_source, inst.error_msg)) + if isinstance(op, str) and not isinstance(op, TaggedInstruction): + ret.append(TaggedInstruction(op, inst.ast_source, inst.error_msg)) else: ret.append(op) return ret # type: ignore @@ -128,10 +127,12 @@ def _as_asm_symbol(label: IRLabel) -> Label: # Lower an IRLabel to an assembly symbol return Label(label.value) + def _ofst(label: str | Label, value: int) -> list[Any]: # resolve at compile time using magic PUSH_OFST op return [PUSH_OFST(label, value)] + # TODO: "assembly" gets into the recursion due to how the original # IR was structured recursively in regards with the deploy instruction. # There, recursing into the deploy instruction was by design, and @@ -153,6 +154,10 @@ def __init__(self, ctxs: list[IRContext]): self.label_counter = 0 self.visited_basicblocks = OrderedSet() + def mklabel(self, name: str) -> Label: + self.label_counter += 1 + return f"{name}_{self.label_counter}" + def generate_evm(self, no_optimize: bool = False) -> list[str]: self.visited_basicblocks = OrderedSet() self.label_counter = 0 @@ -177,7 +182,12 @@ def generate_evm(self, no_optimize: bool = False) -> list[str]: asm.extend([Label("ctor_exit")]) if ctx.immutables_len is not None and ctx.ctor_mem_size is not None: asm.extend( - [PUSHLABEL(Label("subcode_size")), PUSHLABEL(Label("runtime_begin")), "_mem_deploy_start", "CODECOPY"] + [ + PUSHLABEL(Label("subcode_size")), + PUSHLABEL(Label("runtime_begin")), + "_mem_deploy_start", + "CODECOPY", + ] ) asm.extend(_ofst(Label("subcode_size"), ctx.immutables_len)) # stack: len asm.extend(["_mem_deploy_start"]) # stack: len mem_ofst @@ -546,16 +556,10 @@ def _generate_evm_for_instruction( assert isinstance( target, IRLabel ), f"invoke target must be a label (is ${type(target)} ${target})" - return_label = Label(f"label_ret_{self.label_counter}") + return_label = self.mklabel("return_label") assembly.extend( - [ - PUSHLABEL(return_label), - PUSHLABEL(_as_asm_symbol(target)), - "JUMP", - return_label, - ] + [PUSHLABEL(return_label), PUSHLABEL(_as_asm_symbol(target)), "JUMP", return_label] ) - self.label_counter += 1 elif opcode == "ret": assembly.append("JUMP") elif opcode == "return": @@ -581,7 +585,7 @@ def _generate_evm_for_instruction( elif opcode == "assert": assembly.extend(["ISZERO", PUSHLABEL(Label("revert")), "JUMPI"]) elif opcode == "assert_unreachable": - end_symbol = mksymbol("reachable") + end_symbol = self.mklabel("reachable") assembly.extend([PUSHLABEL(end_symbol), "JUMPI", "INVALID", end_symbol]) elif opcode == "iload": addr = inst.operands[0] From d3303465e66654431cea41e96e499b212a401178 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 9 May 2025 16:10:51 +0200 Subject: [PATCH 028/172] more progress --- vyper/ir/compile_ir.py | 51 +++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 1c6826eca0..e5ecb4414e 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -591,25 +591,31 @@ def _height_of(varname): assert isinstance(memsize, int), "non-int memsize" assert isinstance(immutables_len, int), "non-int immutables_len" - runtime_begin = Label("runtime_begin") + runtime_assembly = _IRnodeLowerer().compile_to_assembly(ir) + + runtime_bytecode, _ = assembly_to_evm(runtime_assembly) + runtime_begin = Label("runtime_begin") o = [] + runtime_codesize = len(runtime_bytecode) + + mem_deploy_start, mem_deploy_end = _runtime_code_offsets(memsize, runtime_codesize) + # COPY the code to memory for deploy o.extend( [ - PUSHLABEL(Label("subcode_size")), + *PUSH(runtime_codesize), PUSHLABEL(runtime_begin), - "_mem_deploy_start", + *PUSH(mem_deploy_start), "CODECOPY", ] ) - # calculate the len of runtime code - o.extend( - self._data_ofst_of(Label("subcode_size"), IRnode(immutables_len), height) - ) # stack: len - o.extend(["_mem_deploy_start"]) # stack: len mem_ofst + # calculate the len of runtime code + immutables size + amount_to_return = runtime_codesize + immutables_len + o.extend(*PUSH(amount_to_return)) # stack: len + o.extend(*PUSH(mem_deploy_start)) # stack: len mem_ofst o.extend(["RETURN"]) o.extend(self._create_postambles()) @@ -619,16 +625,18 @@ def _height_of(varname): self.freeze_data_segments = True + # TODO: these two probably not needed o.append(CONST("ctor_mem_size", memsize)) o.append(CONST("immutables_len", immutables_len)) - runtime_assembly = _IRnodeLowerer().compile_to_assembly(ir) - runtime_bytecode, _ = assembly_to_evm(runtime_assembly) + o.append(CONST("mem_deploy_start", mem_deploy_start)) + o.append(CONST("mem_deploy_end", mem_deploy_end)) o.append(runtime_begin) o.append(DATA_ITEM(runtime_bytecode)) + # maybe not needed o.append(Label("runtime_end")) return o @@ -1248,30 +1256,12 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) pc = 0 symbol_map = {} - runtime_code, runtime_code_start, runtime_code_end = None, None, None - - # to optimize the size of deploy code - we want to use the smallest - # PUSH instruction possible which can support all memory symbols - # (and also works with linear pass symbol resolution) - # to do this, we first do a single pass to compile any runtime code - # and use that to calculate mem_ofst_size. - mem_ofst_size, ctor_mem_size = None, None - ## resolve constants for item in assembly: if isinstance(item, CONST): # should this be merged into the symbol map? const_map[item.name] = item.value - # find the maximum mem_ofst - max_mem_ofst = 0 - for item in assembly: - if is_ofst(item) and is_mem_sym(item.label): - max_mem_ofst = max(item.ofst, max_mem_ofst) - - if runtime_code_end is not None: - mem_ofst_size = calc_mem_ofst_size(runtime_code_end + max_mem_ofst) - # go through the code, resolving symbolic locations # (i.e. JUMPDEST locations) to actual code locations for i, item in enumerate(assembly): @@ -1344,11 +1334,6 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) pc += len(bytecode_suffix) symbol_map[Label("code_end")] = pc - symbol_map["_mem_deploy_start"] = runtime_code_start - symbol_map["_mem_deploy_end"] = runtime_code_end - - if runtime_code is not None: - symbol_map[Label("subcode_size")] = len(runtime_code) # TODO refactor into two functions, create symbol_map and assemble From d0dda4c2953c373e4ef870e57da139bfdce2e077 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 9 May 2025 23:07:10 +0200 Subject: [PATCH 029/172] make it compile, add asm_runtime output --- vyper/compiler/__init__.py | 1 + vyper/compiler/output.py | 4 ++ vyper/ir/compile_ir.py | 142 +++++++++++++++++++++++-------------- 3 files changed, 93 insertions(+), 54 deletions(-) diff --git a/vyper/compiler/__init__.py b/vyper/compiler/__init__.py index 57bd2f4096..6e7322c7df 100644 --- a/vyper/compiler/__init__.py +++ b/vyper/compiler/__init__.py @@ -36,6 +36,7 @@ # requires assembly "abi": output.build_abi_output, "asm": output.build_asm_output, + "asm_runtime": output.build_asm_runtime_output, "source_map": output.build_source_map_output, "source_map_runtime": output.build_source_map_runtime_output, # requires bytecode diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 3ccab4869f..408784c625 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -316,6 +316,10 @@ def build_asm_output(compiler_data: CompilerData) -> str: return _build_asm(compiler_data.assembly) +def build_asm_runtime_output(compiler_data: CompilerData) -> str: + return _build_asm(compiler_data.assembly_runtime) + + def build_layout_output(compiler_data: CompilerData) -> StorageLayout: # in the future this might return (non-storage) layout, # for now only storage layout is returned. diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index e5ecb4414e..8abede6d85 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -68,6 +68,25 @@ def __hash__(self): return hash(self.label) +# this could be fused with Label, the only difference is if +# it gets looked up from const_map or symbol_map. +class CONSTREF: + def __init__(self, label: str): + assert isinstance(label, str) + self.label = label + + def __repr__(self): + return f"CONSTREF {self.label}" + + def __eq__(self, other): + if not isinstance(other, CONSTREF): + return False + return self.label == other.label + + def __hash__(self): + return hash(self.label) + + class CONST: def __init__(self, name: str, value: int): assert isinstance(name, str) @@ -103,10 +122,10 @@ def __hash__(self): # push the result of an addition (which might be resolvable at compile-time) class PUSH_OFST: - def __init__(self, label: Label | str, ofst: int): + def __init__(self, label: Label | CONSTREF, ofst: int): # label can be Label or (temporarily) str, until # we clean up mem_syms. - assert isinstance(label, (Label, str)) + assert isinstance(label, (Label, CONSTREF)) self.label = label self.ofst = ofst @@ -124,15 +143,17 @@ def __eq__(self, other): def __hash__(self): return hash((self.label, self.ofst)) + class DATA_ITEM: def __init__(self, item: bytes | Label): self.data = item def __repr__(self): - if isinstance(self.item, bytes): - return "DATABYTES {self.item}" - elif isinstance(self.item, Label): - return "DATALABEL {self.item.label}" + if isinstance(self.data, bytes): + return f"DATABYTES {self.data.hex()}" + elif isinstance(self.data, Label): + return f"DATALABEL {self.data.label}" + def JUMP(label: Label): return [PUSHLABEL(label), "JUMP"] @@ -190,8 +211,8 @@ def _runtime_code_offsets(ctor_mem_size, runtime_codelen): # Calculate the size of PUSH instruction we need to handle all # mem offsets in the code. For instance, if we only see mem symbols # up to size 256, we can use PUSH1. -def calc_mem_ofst_size(ctor_mem_size): - return math.ceil(math.log(ctor_mem_size + 1, 256)) +def calc_push_size(val: int): + return math.ceil(math.log(val + 1, 256)) + 1 # temporary optimization to handle stack items for return sequences @@ -279,7 +300,7 @@ class _IRnodeLowerer: height: int code_instructions: list[AssemblyInstruction] - data_segments: list[DataSegment] + data_segments: list # list[DataSegment] def __init__(self, symbol_counter=0): self.symbol_counter = symbol_counter @@ -295,7 +316,15 @@ def compile_to_assembly(self, code): self.data_segments = [] self.freeze_data_segments = False - return self._compile_r(code, height=0) + ret = self._compile_r(code, height=0) + + # append postambles before data segments + ret.extend(self._create_postambles()) + + for data in self.data_segments: + ret.extend(self._compile_data_segment(data)) + + return ret @contextlib.contextmanager def modify_breakdest(self, continue_dest: Label, exit_dest: Label, height: int): @@ -311,22 +340,25 @@ def mksymbol(self, name: str) -> Label: return Label(f"{name}_{self.symbol_counter}") - def _data_ofst_of(self, symbol: str | Label, ofst: IRnode, height) -> list[AssemblyInstruction]: + def _data_ofst_of( + self, symbol: Label | CONSTREF, ofst: IRnode, height: int + ) -> list[AssemblyInstruction]: # e.g. PUSHOFST foo 32 - assert is_symbol(symbol) or is_mem_sym(symbol), symbol + assert isinstance(symbol, (Label, CONSTREF)), symbol if isinstance(ofst.value, int): # resolve at compile time using magic PUSH_OFST op return [PUSH_OFST(symbol, ofst.value)] + # if we can't resolve at compile time, resolve at runtime if isinstance(symbol, Label): pushsym = PUSHLABEL(symbol) else: # magic for mem syms - assert is_mem_sym(symbol) # clarity - pushsym = symbol + assert isinstance(symbol, CONSTREF) # clarity + # we don't have a PUSHCONST instruction, use PUSH_OFST with ofst of 0 + pushsym = PUSH_OFST(symbol, 0) - # if we can't resolve at compile time, resolve at runtime ofst = self._compile_r(ofst, height) return ofst + [pushsym, "ADD"] @@ -413,7 +445,7 @@ def _height_of(varname): loc = code.args[0] o = [] - o.extend(self._data_ofst_of("_mem_deploy_end", loc, height)) + o.extend(self._data_ofst_of(CONSTREF("mem_deploy_end"), loc, height)) o.append("MLOAD") return o @@ -425,7 +457,7 @@ def _height_of(varname): o = [] o.extend(self._compile_r(val, height)) - o.extend(self._data_ofst_of("_mem_deploy_end", loc, height + 1)) + o.extend(self._data_ofst_of(CONSTREF("mem_deploy_end"), loc, height + 1)) o.append("MSTORE") return o @@ -584,8 +616,9 @@ def _height_of(varname): return o # runtime statement (used to deploy runtime code) - elif code.value == "deploy": - memsize = code.args[0].value # used later to calculate _mem_deploy_start + if code.value == "deploy": + # used to calculate where to copy the runtime code to memory + memsize = code.args[0].value ir = code.args[1] immutables_len = code.args[2].value assert isinstance(memsize, int), "non-int memsize" @@ -612,32 +645,21 @@ def _height_of(varname): ] ) - # calculate the len of runtime code + immutables size - amount_to_return = runtime_codesize + immutables_len - o.extend(*PUSH(amount_to_return)) # stack: len - o.extend(*PUSH(mem_deploy_start)) # stack: len mem_ofst - o.extend(["RETURN"]) - - o.extend(self._create_postambles()) - - for data in self.data_segments: - o.extend(self._compile_data_segment(data)) - - self.freeze_data_segments = True - # TODO: these two probably not needed - o.append(CONST("ctor_mem_size", memsize)) - o.append(CONST("immutables_len", immutables_len)) + # o.append(CONST("ctor_mem_size", memsize)) + # o.append(CONST("immutables_len", immutables_len)) o.append(CONST("mem_deploy_start", mem_deploy_start)) o.append(CONST("mem_deploy_end", mem_deploy_end)) - o.append(runtime_begin) + # calculate the len of runtime code + immutables size + amount_to_return = runtime_codesize + immutables_len + o.extend([*PUSH(amount_to_return)]) # stack: len + o.extend([*PUSH(mem_deploy_start)]) # stack: len mem_ofst - o.append(DATA_ITEM(runtime_bytecode)) + o.extend(["RETURN"]) - # maybe not needed - o.append(Label("runtime_end")) + self.data_segments.append([runtime_begin, DATA_ITEM(runtime_bytecode)]) return o @@ -858,6 +880,9 @@ def _create_postambles(self): return ret + def _compile_data_segment(self, segment: list): + return segment + def _assert_false(self): if self.global_revert_label is None: self.global_revert_label = self.mksymbol("revert") @@ -1255,12 +1280,13 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) pc = 0 symbol_map = {} + const_map = {} ## resolve constants for item in assembly: if isinstance(item, CONST): # should this be merged into the symbol map? - const_map[item.name] = item.value + const_map[CONSTREF(item.name)] = item.value # go through the code, resolving symbolic locations # (i.e. JUMPDEST locations) to actual code locations @@ -1294,18 +1320,18 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) if isinstance(item, PUSHLABEL): pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits - elif is_mem_sym(item): - # PUSH item - pc += mem_ofst_size + 1 + elif is_ofst(item): - assert is_symbol(item.label) or is_mem_sym(item.label), item.label + assert isinstance(item.label, (Label, CONSTREF)) assert isinstance(item.ofst, int), item # [PUSH_OFST, (Label foo), bar] -> PUSH2 (foo+bar) # [PUSH_OFST, _mem_foo, bar] -> PUSHN (foo+bar) if is_symbol(item.label): pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits else: - pc += mem_ofst_size + 1 + const = const_map[item.label] + val = const + item.ofst + pc += calc_push_size(val) elif isinstance(item, DataHeader): symbol_map[item[0].label] = pc @@ -1314,7 +1340,8 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) pc += 1 bytecode_suffix = b"" - if compiler_metadata is not None: + if False: # TODO: bring this back, but in generating assembly. + # if compiler_metadata is not None: # this will hold true when we are in initcode assert immutables_len is not None immutables_len = symbol_map["immutables_len"] @@ -1344,6 +1371,8 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) for item in assembly: if item in ("DEBUG",): continue # skippable opcodes + elif isinstance(item, CONST): + continue # CONST things do not show up in bytecode elif isinstance(item, PUSHLABEL): # push a symbol to stack @@ -1362,21 +1391,20 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) elif is_ofst(item): # PUSH_OFST (LABEL foo) 32 # PUSH_OFST (const foo) 32 - ofst = symbol_map[item.label] + item.ofst - n = mem_ofst_size if is_mem_sym(item.label) else SYMBOL_SIZE - bytecode, _ = assembly_to_evm(PUSH_N(ofst, n)) + if isinstance(item.label, Label): + ofst = symbol_map[item.label] + item.ofst + bytecode, _ = assembly_to_evm(PUSH_N(ofst, SYMBOL_SIZE)) + else: + assert isinstance(item.label, CONSTREF) + ofst = const_map[item.label] + item.ofst + bytecode, _ = assembly_to_evm(PUSH(ofst)) + ret.extend(bytecode) elif isinstance(item, int): ret.append(item) elif isinstance(item, str) and item.upper() in get_opcodes(): ret.append(get_opcodes()[item.upper()][0]) - elif item[:4] == "PUSH": - ret.append(PUSH_OFFSET + int(item[4:])) - elif item[:3] == "DUP": - ret.append(DUP_OFFSET + int(item[3:])) - elif item[:4] == "SWAP": - ret.append(SWAP_OFFSET + int(item[4:])) elif isinstance(item, DATA_ITEM): if isinstance(item.data, bytes): ret.extend(item.data) @@ -1385,6 +1413,12 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) ret.extend(symbolbytes) else: raise CompilerPanic("Invalid data {type(item.data)}, {item.data}") + elif item[:4] == "PUSH": + ret.append(PUSH_OFFSET + int(item[4:])) + elif item[:3] == "DUP": + ret.append(DUP_OFFSET + int(item[3:])) + elif item[:4] == "SWAP": + ret.append(SWAP_OFFSET + int(item[4:])) else: # pragma: no cover # unreachable raise ValueError(f"Weird symbol in assembly: {type(item)} {item}") From b877bb3d4b8bb133f6c46d6fd866221fea52bd0a Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 11 May 2025 10:58:05 +0200 Subject: [PATCH 030/172] wip, remove RuntimeHeader and data_to_evm --- vyper/ir/compile_ir.py | 45 +++++++++++--------------------- vyper/venom/venom_to_assembly.py | 1 - 2 files changed, 15 insertions(+), 31 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 8abede6d85..f94b368e69 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -300,7 +300,7 @@ class _IRnodeLowerer: height: int code_instructions: list[AssemblyInstruction] - data_segments: list # list[DataSegment] + data_segments: list[DataSegment] def __init__(self, symbol_counter=0): self.symbol_counter = symbol_counter @@ -483,9 +483,9 @@ def _height_of(varname): mid_symbol = self.mksymbol("else") end_symbol = self.mksymbol("join") o.extend(["ISZERO", *JUMPI(mid_symbol)]) - o.extend(self._compile(code.args[1], height)) + o.extend(self._compile_r(code.args[1], height)) o.extend([*JUMP(end_symbol), mid_symbol]) - o.extend(self._compile(code.args[2], height)) + o.extend(self._compile_r(code.args[2], height)) o.extend([end_symbol]) return o @@ -551,7 +551,7 @@ def _height_of(varname): with self.modify_breakdest(exit_dest, continue_dest, height + 2): o.extend(self._compile_r(body, height + 2)) - del withargs[i_name.value] + del self.withargs[i_name.value] # clean up any stack items left by body o.extend(["POP"] * body.valency) @@ -1194,22 +1194,6 @@ def adjust_pc_maps(pc_maps, ofst): SYMBOL_SIZE = 2 # size of a PUSH instruction for a code symbol -def _data_to_evm(assembly, symbol_map): - ret = bytearray() - assert isinstance(assembly[0], DataHeader) - for item in assembly[1:]: - if is_symbol(item): - symbol = symbol_map[item].to_bytes(SYMBOL_SIZE, "big") - ret.extend(symbol) - elif isinstance(item, int): - ret.append(item) - elif isinstance(item, bytes): - ret.extend(item) - else: - raise ValueError(f"invalid data {type(item)} {item}") - - return ret - # predict what length of an assembly [data] node will be in bytecode def _length_of_data(assembly): @@ -1229,14 +1213,6 @@ def _length_of_data(assembly): return ret -@dataclass -class RuntimeHeader: - label: Label - - def __repr__(self): - return f"" - - @dataclass class DataHeader: label: Label @@ -1334,8 +1310,14 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) pc += calc_push_size(val) elif isinstance(item, DataHeader): - symbol_map[item[0].label] = pc - pc += _length_of_data(item) + symbol_map[item.label] = pc + #pc += _length_of_data(item) + elif isinstance(item, DATA_ITEM): + if isinstance(item.data, Label): + pc += SYMBOL_SIZE + else: + assert isinstance(item.data, bytes) + pc += len(item.data) else: pc += 1 @@ -1373,6 +1355,8 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) continue # skippable opcodes elif isinstance(item, CONST): continue # CONST things do not show up in bytecode + elif isinstance(item, DataHeader): + continue # DataHeader does not show up in bytecode elif isinstance(item, PUSHLABEL): # push a symbol to stack @@ -1383,6 +1367,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) elif isinstance(item, Label): ret.append(get_opcodes()["JUMPDEST"][0]) + elif is_mem_sym(item): # TODO: use something like PUSH_MEM_SYM(?) for these. bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=mem_ofst_size)) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 9098b7285b..368ef5c521 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -7,7 +7,6 @@ PUSHLABEL, DataHeader, Label, - RuntimeHeader, TaggedInstruction, is_mem_sym, optimize_assembly, From a984ee0514fd4ebd08cd9bcb4dabdc0811fef913 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 11 May 2025 11:16:30 +0200 Subject: [PATCH 031/172] wip, fix unreachable code eliminator --- vyper/ir/compile_ir.py | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index f94b368e69..51d5206429 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -179,6 +179,7 @@ def is_symbol(i): # during assembly, but requires up to 4 bytes of space. # (should only happen in initcode) def is_mem_sym(i): + # should be dead return isinstance(i, str) and i.startswith("_mem_") @@ -270,12 +271,13 @@ def __init__(self, sstr, ast_source=None, error_msg=None): ############################## +# external entry point to `IRnode.compile_to_assembly()` def compile_to_assembly(code, optimize=OptimizationLevel.GAS): - # don't overwrite ir since the original might need to be output, e.g. `-f ir,asm` + # don't mutate the ir since the original might need to be output, e.g. `-f ir,asm` code = copy.deepcopy(code) _rewrite_return_sequences(code) - res = _IRnodeLowerer().compile_to_assembly(code) + res = _IRnodeLowerer(optimize).compile_to_assembly(code) if optimize != OptimizationLevel.NONE: optimize_assembly(res) @@ -302,8 +304,12 @@ class _IRnodeLowerer: code_instructions: list[AssemblyInstruction] data_segments: list[DataSegment] - def __init__(self, symbol_counter=0): - self.symbol_counter = symbol_counter + optimize: OptimizationLevel + + symbol_counter: int = 0 + + def __init__(self, optimize: OptimizationLevel = OptimizationLevel.GAS): + self.optimize = optimize def compile_to_assembly(self, code): self.withargs = {} @@ -624,7 +630,10 @@ def _height_of(varname): assert isinstance(memsize, int), "non-int memsize" assert isinstance(immutables_len, int), "non-int immutables_len" - runtime_assembly = _IRnodeLowerer().compile_to_assembly(ir) + runtime_assembly = _IRnodeLowerer(self.optimize).compile_to_assembly(ir) + + if self.optimize != OptimizationLevel.NONE: + optimize_assembly(runtime_assembly) runtime_bytecode, _ = assembly_to_evm(runtime_assembly) @@ -938,10 +947,10 @@ def _prune_unreachable_code(assembly): i = 0 while i < len(assembly) - 1: if assembly[i] in _TERMINAL_OPS: - # find the next jumpdest or sublist + # find the next jumpdest or data section for j in range(i + 1, len(assembly)): - next_is_jumpdest = j < len(assembly) and is_symbol(assembly[j]) - if next_is_jumpdest: + next_is_reachable = isinstance(assembly[j], (Label, DataHeader)) + if next_is_reachable: break else: # fixup an off-by-one if we made it to the end of the assembly @@ -1089,20 +1098,18 @@ def _merge_iszero(assembly): def _prune_unused_jumpdests(assembly): changed = False - used_jumpdests = OrderedSet() + used_jumpdests: set[Label] = set() # find all used jumpdests - for i in range(len(assembly)): - if isinstance(assembly[i], PUSHLABEL): - used_jumpdests.add(assembly[i].label) - for item in assembly: - if isinstance(item, list) and isinstance(item[0], DataHeader): + if isinstance(item, PUSHLABEL): + used_jumpdests.add(item.label) + + if isinstance(item, DATA_ITEM) and isinstance(item.data, Label): # add symbols used in data sections as they are likely # used for a jumptable. - for t in item: - if is_symbol(t): - used_jumpdests.add(t) + used_jumpdests.add(item.data) + # delete jumpdests that aren't used i = 0 @@ -1369,6 +1376,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) elif is_mem_sym(item): + raise CompilerPanic("unreachable/dead code") # TODO: use something like PUSH_MEM_SYM(?) for these. bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=mem_ofst_size)) ret.extend(bytecode) From 013a41eb61b2c69d04650b31b74b269df92d459d Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 11 May 2025 11:17:31 +0200 Subject: [PATCH 032/172] refactor: make runtime_begin a DataHeader --- vyper/ir/compile_ir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 51d5206429..0653bb3539 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -668,7 +668,7 @@ def _height_of(varname): o.extend(["RETURN"]) - self.data_segments.append([runtime_begin, DATA_ITEM(runtime_bytecode)]) + self.data_segments.append([DataHeader(runtime_begin), DATA_ITEM(runtime_bytecode)]) return o From bf226aa33fc08380a270df7ef3929af49da90843 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 11 May 2025 11:27:33 +0200 Subject: [PATCH 033/172] update asm formatting --- vyper/compiler/output.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 408784c625..2be3d067de 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -329,24 +329,22 @@ def build_layout_output(compiler_data: CompilerData) -> StorageLayout: def _build_asm(asm_list): output_string = "" in_push = 0 - for node in asm_list: - if isinstance(node, list): - output_string += "{ " + _build_asm(node) + "} " + for item in asm_list: + if isinstance(item, (compile_ir.Label, compile_ir.DataHeader)): + output_string += f"\n\n{item}:" continue if in_push > 0: - assert isinstance(node, int), node - output_string += hex(node)[2:].rjust(2, "0") - if in_push == 1: - output_string += " " + assert isinstance(item, int), item + output_string += hex(item)[2:].rjust(2, "0") in_push -= 1 else: - output_string += str(node) + " " + output_string += f"\n {item}" - if isinstance(node, str) and node.startswith("PUSH") and node != "PUSH0": + if isinstance(item, str) and item.startswith("PUSH") and item != "PUSH0": assert in_push == 0 - in_push = int(node[4:]) - output_string += "0x" + in_push = int(item[4:]) + output_string += " 0x" return output_string From 8e0ee080eba9bcad86fa1ab086bdf2a3a6149b0e Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 11 May 2025 11:28:03 +0200 Subject: [PATCH 034/172] formatting --- vyper/ir/compile_ir.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 0653bb3539..56c55c363d 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1110,7 +1110,6 @@ def _prune_unused_jumpdests(assembly): # used for a jumptable. used_jumpdests.add(item.data) - # delete jumpdests that aren't used i = 0 while i < len(assembly): @@ -1201,7 +1200,6 @@ def adjust_pc_maps(pc_maps, ofst): SYMBOL_SIZE = 2 # size of a PUSH instruction for a code symbol - # predict what length of an assembly [data] node will be in bytecode def _length_of_data(assembly): ret = 0 @@ -1318,7 +1316,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) elif isinstance(item, DataHeader): symbol_map[item.label] = pc - #pc += _length_of_data(item) + # pc += _length_of_data(item) elif isinstance(item, DATA_ITEM): if isinstance(item.data, Label): pc += SYMBOL_SIZE @@ -1374,7 +1372,6 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) elif isinstance(item, Label): ret.append(get_opcodes()["JUMPDEST"][0]) - elif is_mem_sym(item): raise CompilerPanic("unreachable/dead code") # TODO: use something like PUSH_MEM_SYM(?) for these. From 4f4c29f102d24e42fe0ea19c0b55588e19e472ae Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 11 May 2025 11:41:01 +0200 Subject: [PATCH 035/172] refactor cbor metadata suffix --- vyper/ir/compile_ir.py | 79 +++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 56c55c363d..894a0af841 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -272,12 +272,12 @@ def __init__(self, sstr, ast_source=None, error_msg=None): # external entry point to `IRnode.compile_to_assembly()` -def compile_to_assembly(code, optimize=OptimizationLevel.GAS): +def compile_to_assembly(code, optimize=OptimizationLevel.GAS, compiler_metadata=None): # don't mutate the ir since the original might need to be output, e.g. `-f ir,asm` code = copy.deepcopy(code) _rewrite_return_sequences(code) - res = _IRnodeLowerer(optimize).compile_to_assembly(code) + res = _IRnodeLowerer(optimize, compiler_metadata).compile_to_assembly(code) if optimize != OptimizationLevel.NONE: optimize_assembly(res) @@ -308,8 +308,9 @@ class _IRnodeLowerer: symbol_counter: int = 0 - def __init__(self, optimize: OptimizationLevel = OptimizationLevel.GAS): + def __init__(self, optimize: OptimizationLevel = OptimizationLevel.GAS, compiler_metadata=None): self.optimize = optimize + self.compiler_metadata=compiler_metadata def compile_to_assembly(self, code): self.withargs = {} @@ -630,11 +631,13 @@ def _height_of(varname): assert isinstance(memsize, int), "non-int memsize" assert isinstance(immutables_len, int), "non-int immutables_len" - runtime_assembly = _IRnodeLowerer(self.optimize).compile_to_assembly(ir) + runtime_assembly = _IRnodeLowerer(self.optimize, self.compiler_metadata).compile_to_assembly(ir) if self.optimize != OptimizationLevel.NONE: optimize_assembly(runtime_assembly) + runtime_data_segment_lengths = get_data_segment_lengths(runtime_assembly) + runtime_bytecode, _ = assembly_to_evm(runtime_assembly) runtime_begin = Label("runtime_begin") @@ -670,6 +673,25 @@ def _height_of(varname): self.data_segments.append([DataHeader(runtime_begin), DATA_ITEM(runtime_bytecode)]) + if self.compiler_metadata is not None: + # we should issue the cbor-encoded metadata. + metadata = ( + compiler_metadata, + runtime_codesize, + runtime_data_segment_lengths, + immutables_len, + {"vyper": version_tuple}, + ) + bytecode_suffix += cbor2.dumps(metadata) + # append the length of the footer, *including* the length + # of the length bytes themselves. + suffix_len = len(bytecode_suffix) + 2 + bytecode_suffix += suffix_len.to_bytes(2, "big") + + segment = [DataHeader(Label("cbor_metadata"))] + segment.append(bytecode_suffix) + self.data_segments.append(segment) + return o # Seq (used to piece together multiple statements) @@ -1201,17 +1223,20 @@ def adjust_pc_maps(pc_maps, ofst): # predict what length of an assembly [data] node will be in bytecode -def _length_of_data(assembly): - ret = 0 - assert isinstance(assembly[0], DataHeader) - for item in assembly[1:]: - if is_symbol(item): - ret += SYMBOL_SIZE - elif isinstance(item, int): - assert 0 <= item < 256, f"invalid data byte {item}" - ret += 1 - elif isinstance(item, bytes): - ret += len(item) +def get_data_segment_lengths(assembly): + ret = [] + for item in assembly: + if isinstance(item, DataHeader): + ret.append(0) + continue + if len(ret) == 0: + # haven't yet seen a data header + continue + assert isinstance(item, DATA_ITEM) + if is_symbol(item.data): + ret[-1] += SYMBOL_SIZE + elif isinstance(item.data, bytes): + ret[-1] += len(item) else: raise ValueError(f"invalid data {type(item)} {item}") @@ -1316,7 +1341,6 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) elif isinstance(item, DataHeader): symbol_map[item.label] = pc - # pc += _length_of_data(item) elif isinstance(item, DATA_ITEM): if isinstance(item.data, Label): pc += SYMBOL_SIZE @@ -1326,27 +1350,6 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) else: pc += 1 - bytecode_suffix = b"" - if False: # TODO: bring this back, but in generating assembly. - # if compiler_metadata is not None: - # this will hold true when we are in initcode - assert immutables_len is not None - immutables_len = symbol_map["immutables_len"] - metadata = ( - compiler_metadata, - len(runtime_code), - data_section_lengths, - immutables_len, - {"vyper": version_tuple}, - ) - bytecode_suffix += cbor2.dumps(metadata) - # append the length of the footer, *including* the length - # of the length bytes themselves. - suffix_len = len(bytecode_suffix) + 2 - bytecode_suffix += suffix_len.to_bytes(2, "big") - - pc += len(bytecode_suffix) - symbol_map[Label("code_end")] = pc # TODO refactor into two functions, create symbol_map and assemble @@ -1413,8 +1416,6 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) # unreachable raise ValueError(f"Weird symbol in assembly: {type(item)} {item}") - ret.extend(bytecode_suffix) - line_number_map["breakpoints"] = list(line_number_map["breakpoints"]) line_number_map["pc_breakpoints"] = list(line_number_map["pc_breakpoints"]) return bytes(ret), line_number_map, symbol_map From 211a869b5359727d02cd73ba7244695ed0f8b963 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 11 May 2025 11:45:38 +0200 Subject: [PATCH 036/172] fix refactor --- vyper/compiler/phases.py | 19 ++++++++++--------- vyper/ir/compile_ir.py | 6 +++--- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 2daa0ec966..3b6005d756 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -257,6 +257,9 @@ def venom_functions(self): @cached_property def assembly(self) -> list: + if not self.no_bytecode_metadata: + metadata = bytes.fromhex(self.integrity_sum) + if self.settings.experimental_codegen: deploy_code, runtime_code = self.venom_functions assert self.settings.optimize is not None # mypy hint @@ -264,7 +267,7 @@ def assembly(self) -> list: runtime_code, deploy_code=deploy_code, optimize=self.settings.optimize ) else: - return generate_assembly(self.ir_nodes, self.settings.optimize) + return generate_assembly(self.ir_nodes, self.settings.optimize, compiler_metadata=metadata) @cached_property def assembly_runtime(self) -> list: @@ -278,13 +281,11 @@ def assembly_runtime(self) -> list: @cached_property def bytecode(self) -> bytes: metadata = None - if not self.no_bytecode_metadata: - metadata = bytes.fromhex(self.integrity_sum) - return generate_bytecode(self.assembly, compiler_metadata=metadata) + return generate_bytecode(self.assembly) @cached_property def bytecode_runtime(self) -> bytes: - return generate_bytecode(self.assembly_runtime, compiler_metadata=None) + return generate_bytecode(self.assembly_runtime) @cached_property def blueprint_bytecode(self) -> bytes: @@ -328,7 +329,7 @@ def generate_ir_nodes(global_ctx: ModuleT, settings: Settings) -> tuple[IRnode, return ir_nodes, ir_runtime -def generate_assembly(ir_nodes: IRnode, optimize: Optional[OptimizationLevel] = None) -> list: +def generate_assembly(ir_nodes: IRnode, optimize: Optional[OptimizationLevel] = None, compiler_metadata: Optional[Any]=None) -> list: """ Generate assembly instructions from IR. @@ -343,7 +344,7 @@ def generate_assembly(ir_nodes: IRnode, optimize: Optional[OptimizationLevel] = List of assembly instructions. """ optimize = optimize or OptimizationLevel.default() - assembly = compile_ir.compile_to_assembly(ir_nodes, optimize=optimize) + assembly = compile_ir.compile_to_assembly(ir_nodes, optimize=optimize, compiler_metadata=compiler_metadata) if _find_nested_opcode(assembly, "DEBUG"): vyper_warn( @@ -363,7 +364,7 @@ def _find_nested_opcode(assembly, key): return any(_find_nested_opcode(x, key) for x in sublists) -def generate_bytecode(assembly: list, compiler_metadata: Optional[Any]) -> bytes: +def generate_bytecode(assembly: list) -> bytes: """ Generate bytecode from assembly instructions. @@ -377,4 +378,4 @@ def generate_bytecode(assembly: list, compiler_metadata: Optional[Any]) -> bytes bytes Final compiled bytecode. """ - return compile_ir.assembly_to_evm(assembly, compiler_metadata=compiler_metadata)[0] + return compile_ir.assembly_to_evm(assembly)[0] diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 894a0af841..e2f7ded37d 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -676,20 +676,20 @@ def _height_of(varname): if self.compiler_metadata is not None: # we should issue the cbor-encoded metadata. metadata = ( - compiler_metadata, + self.compiler_metadata, runtime_codesize, runtime_data_segment_lengths, immutables_len, {"vyper": version_tuple}, ) - bytecode_suffix += cbor2.dumps(metadata) + bytecode_suffix = cbor2.dumps(metadata) # append the length of the footer, *including* the length # of the length bytes themselves. suffix_len = len(bytecode_suffix) + 2 bytecode_suffix += suffix_len.to_bytes(2, "big") segment = [DataHeader(Label("cbor_metadata"))] - segment.append(bytecode_suffix) + segment.append(DATA_ITEM(bytecode_suffix)) self.data_segments.append(segment) return o From a84888bdfcef10a86390468001969853e666fb2e Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 12 May 2025 13:08:46 +0200 Subject: [PATCH 037/172] refactor: split assembly_to_evm from symbol_map construction --- vyper/ir/compile_ir.py | 236 +++++++++++++++++++++++------------------ 1 file changed, 131 insertions(+), 105 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index e2f7ded37d..8d826720f8 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1,5 +1,6 @@ from __future__ import annotations +from typing import TypeVar import contextlib import copy import math @@ -68,6 +69,14 @@ def __hash__(self): return hash(self.label) +@dataclass +class DataHeader: + label: Label + + def __repr__(self): + return f"DATA {self.label.label}" + + # this could be fused with Label, the only difference is if # it gets looked up from const_map or symbol_map. class CONSTREF: @@ -123,8 +132,7 @@ def __hash__(self): # push the result of an addition (which might be resolvable at compile-time) class PUSH_OFST: def __init__(self, label: Label | CONSTREF, ofst: int): - # label can be Label or (temporarily) str, until - # we clean up mem_syms. + # label can be Label or CONSTREF assert isinstance(label, (Label, CONSTREF)) self.label = label self.ofst = ofst @@ -175,13 +183,6 @@ def is_symbol(i): return isinstance(i, Label) -# basically a pointer but like a symbol in that it gets resolved -# during assembly, but requires up to 4 bytes of space. -# (should only happen in initcode) -def is_mem_sym(i): - # should be dead - return isinstance(i, str) and i.startswith("_mem_") - def is_ofst(assembly_item): return isinstance(assembly_item, PUSH_OFST) @@ -209,11 +210,17 @@ def _runtime_code_offsets(ctor_mem_size, runtime_codelen): return runtime_code_start, runtime_code_end -# Calculate the size of PUSH instruction we need to handle all -# mem offsets in the code. For instance, if we only see mem symbols -# up to size 256, we can use PUSH1. +# Calculate the size of PUSH instruction def calc_push_size(val: int): - return math.ceil(math.log(val + 1, 256)) + 1 + # stupid implementation. this is "slow", but its correctness is + # obvious verify, as opposed to + # ``` + # (val.bit_length() + 7) // 8 + # + (1 + # if (val > 0 or version_check(begin="shanghai")) + # else 0) + # ``` + return len(PUSH(val)) # temporary optimization to handle stack items for return sequences @@ -284,7 +291,7 @@ def compile_to_assembly(code, optimize=OptimizationLevel.GAS, compiler_metadata= return res -AssemblyInstruction = str | TaggedInstruction | int | PUSHLABEL | Label | PUSH_OFST +AssemblyInstruction = str | TaggedInstruction | int | PUSHLABEL | Label | PUSH_OFST | DATA_ITEM | DataHeader class _IRnodeLowerer: @@ -381,7 +388,7 @@ def _compile_r(self, code: IRnode, height: int) -> list[AssemblyInstruction]: def _step_r(self, code: IRnode, height: int) -> list[AssemblyInstruction]: def _height_of(varname): ret = height - self.withargs[varname] - if ret > 16: + if ret > 16: # pragma: nocover raise Exception("With statement too deep") return ret @@ -394,9 +401,9 @@ def _height_of(varname): # Numbers if isinstance(code.value, int): - if code.value < -(2**255): + if code.value < -(2**255): # pragma: nocover raise Exception(f"Value too low: {code.value}") - elif code.value >= 2**256: + elif code.value >= 2**256: # pragma: nocover raise Exception(f"Value too high: {code.value}") return PUSH(code.value % 2**256) @@ -409,6 +416,7 @@ def _height_of(varname): if code.value == "set": if len(code.args) != 2 or code.args[0].value not in self.withargs: raise Exception("Set expects two arguments, the first being a stack variable") + # TODO: use _height_of if height - self.withargs[code.args[0].value] > 16: raise Exception("With statement too deep") swap_instr = "SWAP" + str(height - self.withargs[code.args[0].value]) @@ -548,7 +556,7 @@ def _height_of(varname): # stack: i, exit_i o.extend(["SWAP1"]) - if i_name.value in self.withargs: + if i_name.value in self.withargs: # pragma: nocover raise CompilerPanic(f"shadowed loop variable {i_name}") self.withargs[i_name.value] = height + 1 @@ -576,14 +584,14 @@ def _height_of(varname): # Continue to the next iteration of the for loop if code.value == "continue": - if not self.break_dest: + if not self.break_dest: # pragma: nocover raise CompilerPanic("Invalid break") _dest, continue_dest, _break_height = self.break_dest return [*JUMP(continue_dest)] # Break from inside a for loop if code.value == "break": - if not self.break_dest: + if not self.break_dest: # pragma: nocover raise CompilerPanic("Invalid break") dest, _continue_dest, break_height = self.break_dest @@ -594,7 +602,7 @@ def _height_of(varname): # Break from inside one or more for loops prior to a return statement inside the loop if code.value == "cleanup_repeat": - if not self.break_dest: + if not self.break_dest: # pragma: nocover raise CompilerPanic("Invalid break") # clean up local vars and internal loop vars _, _, break_height = self.break_dest @@ -812,7 +820,7 @@ def _height_of(varname): assert len(c.args) == 1 assert isinstance(c.args[0].value, str), (type(c.args[0].value), c) data_node.append(DATA_ITEM(Label(c.args[0].value))) - else: + else: # pragma: nocover raise ValueError(f"Invalid data: {type(c)} {c}") self.data_segments.append(data_node) @@ -842,12 +850,12 @@ def _height_of(varname): label_name = code.args[0].value assert isinstance(label_name, str) - if label_name in self.existing_labels: + if label_name in self.existing_labels: # pragma: nocover raise Exception(f"Label with name {label_name} already exists!") else: self.existing_labels.add(label_name) - if code.args[1].value != "var_list": + if code.args[1].value != "var_list": # pragma: nocover raise CodegenPanic("2nd arg to label must be var_list") var_args = code.args[1].args @@ -878,7 +886,7 @@ def _height_of(varname): symbol = code.args[0].value assert isinstance(symbol, str) - if symbol in self.existing_labels: + if symbol in self.existing_labels: # pragma: nocover raise Exception(f"symbol {symbol} already exists!") else: self.existing_labels.add(symbol) @@ -911,7 +919,7 @@ def _create_postambles(self): return ret - def _compile_data_segment(self, segment: list): + def _compile_data_segment(self, segment: list[AssemblyInstruction]) -> list[AssemblyInstruction]: return segment def _assert_false(self): @@ -926,6 +934,7 @@ def _assert_false(self): ############################## +# TODO: move this to some ast file or vyper/compiler/output.py def getpos(node): return (node.lineno, node.col_offset, node.end_lineno, node.end_col_offset) @@ -1205,20 +1214,6 @@ def optimize_assembly(assembly): raise CompilerPanic("infinite loop detected during assembly reduction") # pragma: nocover -def adjust_pc_maps(pc_maps, ofst): - assert ofst >= 0 - - ret = {} - # source breakpoints, don't need to modify - ret["breakpoints"] = pc_maps["breakpoints"].copy() - ret["pc_breakpoints"] = {pc + ofst for pc in pc_maps["pc_breakpoints"]} - ret["pc_jump_map"] = {k + ofst: v for (k, v) in pc_maps["pc_jump_map"].items()} - ret["pc_raw_ast_map"] = {k + ofst: v for (k, v) in pc_maps["pc_raw_ast_map"].items()} - ret["error_map"] = {k + ofst: v for (k, v) in pc_maps["error_map"].items()} - - return ret - - SYMBOL_SIZE = 2 # size of a PUSH instruction for a code symbol @@ -1237,69 +1232,84 @@ def get_data_segment_lengths(assembly): ret[-1] += SYMBOL_SIZE elif isinstance(item.data, bytes): ret[-1] += len(item) - else: + else: # pragma: nocover raise ValueError(f"invalid data {type(item)} {item}") return ret -@dataclass -class DataHeader: - label: Label - - def __repr__(self): - return f"DATA {self.label.label}" - ############################## # assembly to evm bytecode ############################## -# TODO: change API to split assembly_to_evm and assembly_to_source/symbol_maps -def assembly_to_evm(assembly, pc_ofst=0, compiler_metadata=None): - bytecode, source_maps, _ = assembly_to_evm_with_symbol_map( - assembly, pc_ofst=pc_ofst, compiler_metadata=compiler_metadata - ) - return bytecode, source_maps +def _compile_data_item(item: DATA_ITEM, symbol_map: dict[Label, int]) -> bytes: + if isinstance(item.data, bytes): + return item.data + if isinstance(item.data, Label): + symbolbytes = symbol_map[item.data].to_bytes(SYMBOL_SIZE, "big") + return symbolbytes + + raise CompilerPanic("Invalid data {type(item.data)}, {item.data}") # pragma: nocover + +T = TypeVar("T") +def _add_to_symbol_map(symbol_map: dict[T, int], item: T, value: int): + if item in symbol_map: # pragma: nocover + raise CompilerPanic(f"duplicate label: {label}") + symbol_map[item] = value -def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None): + + +def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[str, Any]]: """ - Assembles assembly into EVM + Generate bytecode and source map from assembly - assembly: list of asm instructions - pc_ofst: when constructing the source map, the amount to offset all - pcs by (no effect until we add deploy code source map) - compiler_metadata: any compiler metadata to add. pass `None` to indicate - no metadata to be added (should always be `None` for - runtime code). the value is opaque, and will be passed - directly to `cbor2.dumps()`. + Returns: + bytecode: bytestring of the EVM bytecode + source_map: source map dict that gets output for the user """ - line_number_map = { - "breakpoints": set(), - "pc_breakpoints": set(), + # This API might seem a bit strange, but it's backwards compatible + symbol_map, const_map, source_map = make_symbol_map(assembly) + bytecode = _assembly_to_evm(assembly, symbol_map, const_map) + return bytecode, source_map + + +# resolve symbols in assembly +def make_symbol_map(assembly: list[AssemblyInstruction]) -> tuple[dict[Label,int], dict[CONSTREF, int], dict[str, Any]]: + """ + Construct symbol map from assembly list + + Returns: + symbol_map: dict from labels to values + const_map: dict from CONSTREFs to values + source_map: source map dict that gets output for the user + """ + source_map = { + "breakpoints": OrderedSet(), + "pc_breakpoints": OrderedSet(), "pc_jump_map": {0: "-"}, "pc_raw_ast_map": {}, "error_map": {}, } - pc = 0 - symbol_map = {} - const_map = {} + symbol_map: dict[Label, int] = {} + const_map: dict[CONSTREF, int] = {} - ## resolve constants + pc: int = 0 + + # resolve constants for item in assembly: if isinstance(item, CONST): # should this be merged into the symbol map? - const_map[CONSTREF(item.name)] = item.value + _add_to_symbol_map(const_map, CONSTREF(item.name), item.value) - # go through the code, resolving symbolic locations - # (i.e. JUMPDEST locations) to actual code locations + # resolve labels (i.e. JUMPDEST locations) to actual code locations, + # and simultaneously build the source map. for i, item in enumerate(assembly): - note_line_num(line_number_map, pc, item) - if item == "DEBUG": - continue # skip debug + # add it to the source map + note_line_num(source_map, pc, item) # update pc_jump_map if item == "JUMP": @@ -1307,24 +1317,31 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) if isinstance(last, PUSHLABEL) and last.label.label.startswith("internal"): if last.label.label.endswith("cleanup"): # exit an internal function - line_number_map["pc_jump_map"][pc] = "o" + source_map["pc_jump_map"][pc] = "o" else: # enter an internal function - line_number_map["pc_jump_map"][pc] = "i" + source_map["pc_jump_map"][pc] = "i" else: # everything else - line_number_map["pc_jump_map"][pc] = "-" + source_map["pc_jump_map"][pc] = "-" elif item in ("JUMPI", "JUMPDEST"): - line_number_map["pc_jump_map"][pc] = "-" + source_map["pc_jump_map"][pc] = "-" + + if item == "DEBUG": + continue # "debug" opcode does not go into bytecode + + if isinstance(item, CONST): + continue # CONST declarations do not go into bytecode # update pc - if is_symbol(item): - if item in symbol_map: - raise CompilerPanic(f"duplicate {item}") + if isinstance(item, Label): # Don't increment pc as the symbol itself doesn't go into code - symbol_map[item] = pc + _add_to_symbol_map(symbol_map, item, pc) - if isinstance(item, PUSHLABEL): + elif isinstance(item, DataHeader): + _add_to_symbol_map(symbol_map, item.label, pc) + + elif isinstance(item, PUSHLABEL): pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits elif is_ofst(item): @@ -1339,21 +1356,44 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) val = const + item.ofst pc += calc_push_size(val) - elif isinstance(item, DataHeader): - symbol_map[item.label] = pc elif isinstance(item, DATA_ITEM): if isinstance(item.data, Label): pc += SYMBOL_SIZE else: assert isinstance(item.data, bytes) pc += len(item.data) + elif isinstance(item, int): + assert 0 <= item < 256 + pc += 1 else: + assert isinstance(item, str) and item in get_opcodes(), item pc += 1 - symbol_map[Label("code_end")] = pc + source_map["breakpoints"] = list(source_map["breakpoints"]) + source_map["pc_breakpoints"] = list(source_map["pc_breakpoints"]) + + # magic -- probably the assembler should actually add this label + _add_to_symbol_map(symbol_map, Label("code_end"), pc) + + return symbol_map, const_map, source_map + + +def _assembly_to_evm(assembly: list[AssemblyInstruction], symbol_map: dict[Label, int], const_map: dict[CONSTREF, int]) -> bytes: + """ + Assembles assembly into EVM bytecode + + assembly: list of asm instructions + symbol_map: dict from labels to resolved locations in the code + const_map: dict from constrefs to their values - # TODO refactor into two functions, create symbol_map and assemble + TODO: move this + compiler_metadata: any compiler metadata to add. pass `None` to indicate + no metadata to be added (should always be `None` for + runtime code). the value is opaque, and will be passed + directly to `cbor2.dumps()`. + Returns: bytes representing the bytecode + """ ret = bytearray() # now that all symbols have been resolved, generate bytecode @@ -1375,12 +1415,6 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) elif isinstance(item, Label): ret.append(get_opcodes()["JUMPDEST"][0]) - elif is_mem_sym(item): - raise CompilerPanic("unreachable/dead code") - # TODO: use something like PUSH_MEM_SYM(?) for these. - bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=mem_ofst_size)) - ret.extend(bytecode) - elif is_ofst(item): # PUSH_OFST (LABEL foo) 32 # PUSH_OFST (const foo) 32 @@ -1399,13 +1433,7 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) elif isinstance(item, str) and item.upper() in get_opcodes(): ret.append(get_opcodes()[item.upper()][0]) elif isinstance(item, DATA_ITEM): - if isinstance(item.data, bytes): - ret.extend(item.data) - elif isinstance(item.data, Label): - symbolbytes = symbol_map[item.data].to_bytes(SYMBOL_SIZE, "big") - ret.extend(symbolbytes) - else: - raise CompilerPanic("Invalid data {type(item.data)}, {item.data}") + ret.extend(_compile_data_item(item, symbol_map)) elif item[:4] == "PUSH": ret.append(PUSH_OFFSET + int(item[4:])) elif item[:3] == "DUP": @@ -1416,6 +1444,4 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, compiler_metadata=None) # unreachable raise ValueError(f"Weird symbol in assembly: {type(item)} {item}") - line_number_map["breakpoints"] = list(line_number_map["breakpoints"]) - line_number_map["pc_breakpoints"] = list(line_number_map["pc_breakpoints"]) - return bytes(ret), line_number_map, symbol_map + return bytes(ret) From 2a1befaf94131e29d969706a6368da3662529436 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 12 May 2025 13:12:23 +0200 Subject: [PATCH 038/172] begin fixing lint --- vyper/compiler/phases.py | 16 +++++++++---- vyper/ir/compile_ir.py | 52 ++++++++++++++++++++++++---------------- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 3b6005d756..2fa294def8 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -257,6 +257,7 @@ def venom_functions(self): @cached_property def assembly(self) -> list: + metadata = None if not self.no_bytecode_metadata: metadata = bytes.fromhex(self.integrity_sum) @@ -267,7 +268,9 @@ def assembly(self) -> list: runtime_code, deploy_code=deploy_code, optimize=self.settings.optimize ) else: - return generate_assembly(self.ir_nodes, self.settings.optimize, compiler_metadata=metadata) + return generate_assembly( + self.ir_nodes, self.settings.optimize, compiler_metadata=metadata + ) @cached_property def assembly_runtime(self) -> list: @@ -280,7 +283,6 @@ def assembly_runtime(self) -> list: @cached_property def bytecode(self) -> bytes: - metadata = None return generate_bytecode(self.assembly) @cached_property @@ -329,7 +331,11 @@ def generate_ir_nodes(global_ctx: ModuleT, settings: Settings) -> tuple[IRnode, return ir_nodes, ir_runtime -def generate_assembly(ir_nodes: IRnode, optimize: Optional[OptimizationLevel] = None, compiler_metadata: Optional[Any]=None) -> list: +def generate_assembly( + ir_nodes: IRnode, + optimize: Optional[OptimizationLevel] = None, + compiler_metadata: Optional[Any] = None, +) -> list: """ Generate assembly instructions from IR. @@ -344,7 +350,9 @@ def generate_assembly(ir_nodes: IRnode, optimize: Optional[OptimizationLevel] = List of assembly instructions. """ optimize = optimize or OptimizationLevel.default() - assembly = compile_ir.compile_to_assembly(ir_nodes, optimize=optimize, compiler_metadata=compiler_metadata) + assembly = compile_ir.compile_to_assembly( + ir_nodes, optimize=optimize, compiler_metadata=compiler_metadata + ) if _find_nested_opcode(assembly, "DEBUG"): vyper_warn( diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 8d826720f8..aa6720cd04 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1,10 +1,9 @@ from __future__ import annotations -from typing import TypeVar import contextlib import copy -import math from dataclasses import dataclass +from typing import Any, TypeVar import cbor2 @@ -183,7 +182,6 @@ def is_symbol(i): return isinstance(i, Label) - def is_ofst(assembly_item): return isinstance(assembly_item, PUSH_OFST) @@ -291,7 +289,9 @@ def compile_to_assembly(code, optimize=OptimizationLevel.GAS, compiler_metadata= return res -AssemblyInstruction = str | TaggedInstruction | int | PUSHLABEL | Label | PUSH_OFST | DATA_ITEM | DataHeader +AssemblyInstruction = ( + str | TaggedInstruction | int | PUSHLABEL | Label | PUSH_OFST | DATA_ITEM | DataHeader +) class _IRnodeLowerer: @@ -309,7 +309,7 @@ class _IRnodeLowerer: height: int code_instructions: list[AssemblyInstruction] - data_segments: list[DataSegment] + data_segments: list[AssemblyInstruction] optimize: OptimizationLevel @@ -317,7 +317,7 @@ class _IRnodeLowerer: def __init__(self, optimize: OptimizationLevel = OptimizationLevel.GAS, compiler_metadata=None): self.optimize = optimize - self.compiler_metadata=compiler_metadata + self.compiler_metadata = compiler_metadata def compile_to_assembly(self, code): self.withargs = {} @@ -556,7 +556,7 @@ def _height_of(varname): # stack: i, exit_i o.extend(["SWAP1"]) - if i_name.value in self.withargs: # pragma: nocover + if i_name.value in self.withargs: # pragma: nocover raise CompilerPanic(f"shadowed loop variable {i_name}") self.withargs[i_name.value] = height + 1 @@ -591,7 +591,7 @@ def _height_of(varname): # Break from inside a for loop if code.value == "break": - if not self.break_dest: # pragma: nocover + if not self.break_dest: # pragma: nocover raise CompilerPanic("Invalid break") dest, _continue_dest, break_height = self.break_dest @@ -602,7 +602,7 @@ def _height_of(varname): # Break from inside one or more for loops prior to a return statement inside the loop if code.value == "cleanup_repeat": - if not self.break_dest: # pragma: nocover + if not self.break_dest: # pragma: nocover raise CompilerPanic("Invalid break") # clean up local vars and internal loop vars _, _, break_height = self.break_dest @@ -639,7 +639,9 @@ def _height_of(varname): assert isinstance(memsize, int), "non-int memsize" assert isinstance(immutables_len, int), "non-int immutables_len" - runtime_assembly = _IRnodeLowerer(self.optimize, self.compiler_metadata).compile_to_assembly(ir) + runtime_assembly = _IRnodeLowerer( + self.optimize, self.compiler_metadata + ).compile_to_assembly(ir) if self.optimize != OptimizationLevel.NONE: optimize_assembly(runtime_assembly) @@ -820,7 +822,7 @@ def _height_of(varname): assert len(c.args) == 1 assert isinstance(c.args[0].value, str), (type(c.args[0].value), c) data_node.append(DATA_ITEM(Label(c.args[0].value))) - else: # pragma: nocover + else: # pragma: nocover raise ValueError(f"Invalid data: {type(c)} {c}") self.data_segments.append(data_node) @@ -850,12 +852,12 @@ def _height_of(varname): label_name = code.args[0].value assert isinstance(label_name, str) - if label_name in self.existing_labels: # pragma: nocover + if label_name in self.existing_labels: # pragma: nocover raise Exception(f"Label with name {label_name} already exists!") else: self.existing_labels.add(label_name) - if code.args[1].value != "var_list": # pragma: nocover + if code.args[1].value != "var_list": # pragma: nocover raise CodegenPanic("2nd arg to label must be var_list") var_args = code.args[1].args @@ -886,7 +888,7 @@ def _height_of(varname): symbol = code.args[0].value assert isinstance(symbol, str) - if symbol in self.existing_labels: # pragma: nocover + if symbol in self.existing_labels: # pragma: nocover raise Exception(f"symbol {symbol} already exists!") else: self.existing_labels.add(symbol) @@ -919,7 +921,9 @@ def _create_postambles(self): return ret - def _compile_data_segment(self, segment: list[AssemblyInstruction]) -> list[AssemblyInstruction]: + def _compile_data_segment( + self, segment: list[AssemblyInstruction] + ) -> list[AssemblyInstruction]: return segment def _assert_false(self): @@ -1238,7 +1242,6 @@ def get_data_segment_lengths(assembly): return ret - ############################## # assembly to evm bytecode ############################## @@ -1251,17 +1254,18 @@ def _compile_data_item(item: DATA_ITEM, symbol_map: dict[Label, int]) -> bytes: symbolbytes = symbol_map[item.data].to_bytes(SYMBOL_SIZE, "big") return symbolbytes - raise CompilerPanic("Invalid data {type(item.data)}, {item.data}") # pragma: nocover + raise CompilerPanic(f"Invalid data {type(item.data)}, {item.data}") # pragma: nocover + T = TypeVar("T") + def _add_to_symbol_map(symbol_map: dict[T, int], item: T, value: int): if item in symbol_map: # pragma: nocover - raise CompilerPanic(f"duplicate label: {label}") + raise CompilerPanic(f"duplicate label: {item}") symbol_map[item] = value - def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[str, Any]]: """ Generate bytecode and source map from assembly @@ -1277,7 +1281,9 @@ def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[st # resolve symbols in assembly -def make_symbol_map(assembly: list[AssemblyInstruction]) -> tuple[dict[Label,int], dict[CONSTREF, int], dict[str, Any]]: +def make_symbol_map( + assembly: list[AssemblyInstruction], +) -> tuple[dict[Label, int], dict[CONSTREF, int], dict[str, Any]]: """ Construct symbol map from assembly list @@ -1378,7 +1384,11 @@ def make_symbol_map(assembly: list[AssemblyInstruction]) -> tuple[dict[Label,int return symbol_map, const_map, source_map -def _assembly_to_evm(assembly: list[AssemblyInstruction], symbol_map: dict[Label, int], const_map: dict[CONSTREF, int]) -> bytes: +def _assembly_to_evm( + assembly: list[AssemblyInstruction], + symbol_map: dict[Label, int], + const_map: dict[CONSTREF, int], +) -> bytes: """ Assembles assembly into EVM bytecode From 05cfb9de45d7b6af6bb4458ab1928df190d58fae Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 12 May 2025 16:23:25 +0200 Subject: [PATCH 039/172] wip lint --- vyper/evm/opcodes.py | 9 +++-- vyper/ir/compile_ir.py | 77 +++++++++++++++++++++++++++--------------- 2 files changed, 55 insertions(+), 31 deletions(-) diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index 3049d7f911..3c6a80d33e 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -220,9 +220,12 @@ def _gas(value: OpcodeValue, idx: int) -> Optional[OpcodeRulesetValue]: def _mk_version_opcodes(opcodes: OpcodeMap, idx: int) -> OpcodeRulesetMap: - return dict( - (k, _gas(v, idx)) for k, v in opcodes.items() if _gas(v, idx) is not None # type: ignore - ) + ret = {} + for k, v in opcodes.items(): + gas = _gas(v, idx) + if gas is not None: + ret[k] = gas + return ret _evm_opcodes: Dict[int, OpcodeRulesetMap] = { diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index aa6720cd04..278cbbfb3a 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -290,7 +290,7 @@ def compile_to_assembly(code, optimize=OptimizationLevel.GAS, compiler_metadata= AssemblyInstruction = ( - str | TaggedInstruction | int | PUSHLABEL | Label | PUSH_OFST | DATA_ITEM | DataHeader + str | TaggedInstruction | int | PUSHLABEL | Label | PUSH_OFST | DATA_ITEM | DataHeader | CONST ) @@ -298,8 +298,8 @@ class _IRnodeLowerer: # map from variable names to height in stack withargs: dict[str, int] - # set of all existing labels - existing_labels: set[Label] + # set of all existing labels in the IRnodes + existing_labels: set[str] # break destination when inside loops # continue_dest, break_dest, height @@ -309,7 +309,7 @@ class _IRnodeLowerer: height: int code_instructions: list[AssemblyInstruction] - data_segments: list[AssemblyInstruction] + data_segments: list[list[AssemblyInstruction]] optimize: OptimizationLevel @@ -365,6 +365,7 @@ def _data_ofst_of( return [PUSH_OFST(symbol, ofst.value)] # if we can't resolve at compile time, resolve at runtime + pushsym: PUSHLABEL | PUSH_OFST if isinstance(symbol, Label): pushsym = PUSHLABEL(symbol) else: @@ -373,8 +374,8 @@ def _data_ofst_of( # we don't have a PUSHCONST instruction, use PUSH_OFST with ofst of 0 pushsym = PUSH_OFST(symbol, 0) - ofst = self._compile_r(ofst, height) - return ofst + [pushsym, "ADD"] + ofst_asm = self._compile_r(ofst, height) + return ofst_asm + [pushsym, "ADD"] def _compile_r(self, code: IRnode, height: int) -> list[AssemblyInstruction]: asm = self._step_r(code, height) @@ -414,12 +415,14 @@ def _height_of(varname): # Setting variables connected to with statements if code.value == "set": - if len(code.args) != 2 or code.args[0].value not in self.withargs: + varname = code.args[0].value + assert isinstance(varname, str) + if len(code.args) != 2 or varname not in self.withargs: raise Exception("Set expects two arguments, the first being a stack variable") # TODO: use _height_of - if height - self.withargs[code.args[0].value] > 16: + if height - self.withargs[varname] > 16: raise Exception("With statement too deep") - swap_instr = "SWAP" + str(height - self.withargs[code.args[0].value]) + swap_instr = "SWAP" + str(height - self.withargs[varname]) return self._compile_r(code.args[1], height) + [swap_instr, "POP"] # Pass statements @@ -524,6 +527,8 @@ def _height_of(varname): rounds_bound = code.args[3] body = code.args[4] + assert isinstance(i_name.value, str) # help mypy + entry_dest = self.mksymbol("loop_start") continue_dest = self.mksymbol("loop_continue") exit_dest = self.mksymbol("loop_exit") @@ -615,19 +620,22 @@ def _height_of(varname): # With statements if code.value == "with": + varname = code.args[0].value + assert isinstance(varname, str) + o = [] o.extend(self._compile_r(code.args[1], height)) - old = self.withargs.get(code.args[0].value, None) - self.withargs[code.args[0].value] = height + old = self.withargs.get(varname, None) + self.withargs[varname] = height o.extend(self._compile_r(code.args[2], height + 1)) if code.args[2].valency: o.extend(["SWAP1", "POP"]) else: o.extend(["POP"]) if old is not None: - self.withargs[code.args[0].value] = old + self.withargs[varname] = old else: - del self.withargs[code.args[0].value] + del self.withargs[varname] return o # runtime statement (used to deploy runtime code) @@ -698,7 +706,7 @@ def _height_of(varname): suffix_len = len(bytecode_suffix) + 2 bytecode_suffix += suffix_len.to_bytes(2, "big") - segment = [DataHeader(Label("cbor_metadata"))] + segment: list[AssemblyInstruction] = [DataHeader(Label("cbor_metadata"))] segment.append(DATA_ITEM(bytecode_suffix)) self.data_segments.append(segment) @@ -812,20 +820,23 @@ def _height_of(varname): return self._compile_r(expanded_ir, height) if code.value == "data": - data_node = [DataHeader(Label(code.args[0].value))] + assert isinstance(code.args[0].value, str) # help mypy + + data_header = DataHeader(Label(code.args[0].value)) + data_items = [] for c in code.args[1:]: if isinstance(c.value, bytes): - data_node.append(DATA_ITEM(c.value)) + data_items.append(DATA_ITEM(c.value)) elif isinstance(c, IRnode): assert c.value == "symbol" assert len(c.args) == 1 assert isinstance(c.args[0].value, str), (type(c.args[0].value), c) - data_node.append(DATA_ITEM(Label(c.args[0].value))) + data_items.append(DATA_ITEM(Label(c.args[0].value))) else: # pragma: nocover raise ValueError(f"Invalid data: {type(c)} {c}") - self.data_segments.append(data_node) + self.data_segments.append([data_header, *data_items]) return [] # jump to a symbol, and push variable # of arguments onto stack @@ -833,7 +844,9 @@ def _height_of(varname): o = [] for i, c in enumerate(reversed(code.args[1:])): o.extend(self._compile_r(c, height + i)) - o.extend([*JUMP(Label(code.args[0].value))]) + target = code.args[0].value + assert isinstance(target, str) # help mypy + o.extend([*JUMP(Label(target))]) return o if code.value == "djump": @@ -845,7 +858,9 @@ def _height_of(varname): return o # push a literal symbol if code.value == "symbol": - return [PUSHLABEL(Label(code.args[0].value))] + label = code.args[0].value + assert isinstance(label, str) + return [PUSHLABEL(Label(label))] # set a symbol as a location. if code.value == "label": @@ -1292,7 +1307,7 @@ def make_symbol_map( const_map: dict from CONSTREFs to values source_map: source map dict that gets output for the user """ - source_map = { + source_map: dict[str, Any] = { "breakpoints": OrderedSet(), "pc_breakpoints": OrderedSet(), "pc_jump_map": {0: "-"}, @@ -1350,17 +1365,18 @@ def make_symbol_map( elif isinstance(item, PUSHLABEL): pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits - elif is_ofst(item): - assert isinstance(item.label, (Label, CONSTREF)) + elif isinstance(item, PUSH_OFST): assert isinstance(item.ofst, int), item # [PUSH_OFST, (Label foo), bar] -> PUSH2 (foo+bar) # [PUSH_OFST, _mem_foo, bar] -> PUSHN (foo+bar) - if is_symbol(item.label): + if isinstance(item.label, Label): pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits - else: + elif isinstance(item.label, CONSTREF): const = const_map[item.label] val = const + item.ofst pc += calc_push_size(val) + else: # pragma: nocover + raise CompilerPanic(f"invalid ofst {item.label}") elif isinstance(item, DATA_ITEM): if isinstance(item.data, Label): @@ -1423,9 +1439,11 @@ def _assembly_to_evm( ret.extend(bytecode) elif isinstance(item, Label): - ret.append(get_opcodes()["JUMPDEST"][0]) + jumpdest_opcode = get_opcodes()["JUMPDEST"][0] + assert jumpdest_opcode is not None # help mypy + ret.append(jumpdest_opcode) - elif is_ofst(item): + elif isinstance(item, PUSH_OFST): # PUSH_OFST (LABEL foo) 32 # PUSH_OFST (const foo) 32 if isinstance(item.label, Label): @@ -1441,7 +1459,10 @@ def _assembly_to_evm( elif isinstance(item, int): ret.append(item) elif isinstance(item, str) and item.upper() in get_opcodes(): - ret.append(get_opcodes()[item.upper()][0]) + opcode = get_opcodes()[item.upper()][0] + # TODO: fix signature of get_opcodes() + assert opcode is not None # help mypy + ret.append(opcode) elif isinstance(item, DATA_ITEM): ret.extend(_compile_data_item(item, symbol_map)) elif item[:4] == "PUSH": From 05251b4f7eeb3faf326b1baf999d2325311c3cd4 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 12 May 2025 17:55:49 +0200 Subject: [PATCH 040/172] update docstring --- vyper/ir/compile_ir.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 278cbbfb3a..895b1d2d9b 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -277,7 +277,18 @@ def __init__(self, sstr, ast_source=None, error_msg=None): # external entry point to `IRnode.compile_to_assembly()` -def compile_to_assembly(code, optimize=OptimizationLevel.GAS, compiler_metadata=None): +def compile_to_assembly(code: IRnode, optimize: OptimizationLevel=OptimizationLevel.GAS, compiler_metadata: Optional[Any]=None): + """ + Parameters: + code: IRnode to compile + optimize: Optimization level + compiler_metadata: + any compiler metadata to add as the final data segment. pass + `None` to indicate no metadata to be added (should always + be `None` for runtime code). the value is opaque, and will be + passed directly to `cbor2.dumps()`. + """ + # don't mutate the ir since the original might need to be output, e.g. `-f ir,asm` code = copy.deepcopy(code) _rewrite_return_sequences(code) @@ -1408,15 +1419,10 @@ def _assembly_to_evm( """ Assembles assembly into EVM bytecode - assembly: list of asm instructions - symbol_map: dict from labels to resolved locations in the code - const_map: dict from constrefs to their values - - TODO: move this - compiler_metadata: any compiler metadata to add. pass `None` to indicate - no metadata to be added (should always be `None` for - runtime code). the value is opaque, and will be passed - directly to `cbor2.dumps()`. + Parameters: + assembly: list of asm instructions + symbol_map: dict from labels to resolved locations in the code + const_map: dict from constrefs to their values Returns: bytes representing the bytecode """ From 292276e26e27e4f2208a15fe7b1df601553e6584 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 12 May 2025 18:03:38 +0200 Subject: [PATCH 041/172] add source_map to CompilerData --- vyper/compiler/output.py | 12 ++++++------ vyper/compiler/phases.py | 26 ++++++++++++++++++++++---- vyper/ir/compile_ir.py | 8 ++++++-- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 2be3d067de..fe6eba6507 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -386,15 +386,15 @@ def _build_source_map_output(compiler_data, bytecode, pc_maps): def build_source_map_output(compiler_data: CompilerData) -> dict: - bytecode, pc_maps = compile_ir.assembly_to_evm(compiler_data.assembly, compiler_metadata=None) - return _build_source_map_output(compiler_data, bytecode, pc_maps) + bytecode = compiler_data.bytecode + source_map = compiler_data.source_map + return _build_source_map_output(compiler_data, bytecode, source_map) def build_source_map_runtime_output(compiler_data: CompilerData) -> dict: - bytecode, pc_maps = compile_ir.assembly_to_evm( - compiler_data.assembly_runtime, compiler_metadata=None - ) - return _build_source_map_output(compiler_data, bytecode, pc_maps) + bytecode = compiler_data.bytecode_runtime + source_map = compiler_data.source_map_runtime + return _build_source_map_output(compiler_data, bytecode, source_map) # generate a solidity-style source map. this functionality is deprecated diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 2fa294def8..49237cc668 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -282,13 +282,29 @@ def assembly_runtime(self) -> list: return generate_assembly(self.ir_runtime, self.settings.optimize) @cached_property - def bytecode(self) -> bytes: + def _bytecode(self) -> tuple[bytes, dict[str, Any]]: return generate_bytecode(self.assembly) + @property + def bytecode(self) -> bytes: + return self._bytecode[0] + + @property + def source_map(self) -> dict[str, Any]: + return self._bytecode[1] + @cached_property - def bytecode_runtime(self) -> bytes: + def _bytecode_runtime(self) -> tuple[bytes, dict[str, Any]]: return generate_bytecode(self.assembly_runtime) + @property + def bytecode_runtime(self) -> bytes: + return self._bytecode_runtime[0] + + @property + def source_map_runtime(self) -> dict[str, Any]: + return self._bytecode_runtime[1] + @cached_property def blueprint_bytecode(self) -> bytes: blueprint_bytecode = ERC5202_PREFIX + self.bytecode @@ -372,7 +388,7 @@ def _find_nested_opcode(assembly, key): return any(_find_nested_opcode(x, key) for x in sublists) -def generate_bytecode(assembly: list) -> bytes: +def generate_bytecode(assembly: list) -> tuple[bytes, dict[str, Any]]: """ Generate bytecode from assembly instructions. @@ -385,5 +401,7 @@ def generate_bytecode(assembly: list) -> bytes: ------- bytes Final compiled bytecode. + dict + Source map """ - return compile_ir.assembly_to_evm(assembly)[0] + return compile_ir.assembly_to_evm(assembly) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 895b1d2d9b..37c05cbb19 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -3,7 +3,7 @@ import contextlib import copy from dataclasses import dataclass -from typing import Any, TypeVar +from typing import Any, Optional, TypeVar import cbor2 @@ -277,7 +277,11 @@ def __init__(self, sstr, ast_source=None, error_msg=None): # external entry point to `IRnode.compile_to_assembly()` -def compile_to_assembly(code: IRnode, optimize: OptimizationLevel=OptimizationLevel.GAS, compiler_metadata: Optional[Any]=None): +def compile_to_assembly( + code: IRnode, + optimize: OptimizationLevel = OptimizationLevel.GAS, + compiler_metadata: Optional[Any] = None, +): """ Parameters: code: IRnode to compile From cb8a36efb211a06998bfb2507a6444f4a083e55b Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 12 May 2025 18:07:17 +0200 Subject: [PATCH 042/172] rename generate_evm to generate_evm_assembly --- vyper/venom/__init__.py | 2 +- vyper/venom/venom_to_assembly.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 49ed345034..761b06dc1c 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -48,7 +48,7 @@ def generate_assembly_experimental( functions = [runtime_code] compiler = VenomCompiler(functions) - return compiler.generate_evm(optimize == OptimizationLevel.NONE) + return compiler.generate_evm_assembly(optimize == OptimizationLevel.NONE) def _run_passes(fn: IRFunction, optimize: OptimizationLevel, ac: IRAnalysesCache) -> None: diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 368ef5c521..7f70ceac6e 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -8,7 +8,6 @@ DataHeader, Label, TaggedInstruction, - is_mem_sym, optimize_assembly, ) from vyper.utils import MemoryPositions, OrderedSet, wrap256 @@ -157,7 +156,7 @@ def mklabel(self, name: str) -> Label: self.label_counter += 1 return f"{name}_{self.label_counter}" - def generate_evm(self, no_optimize: bool = False) -> list[str]: + def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstruction]: self.visited_basicblocks = OrderedSet() self.label_counter = 0 From 51a98ab623e2ea258895c1572b5702d12ad4b13d Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 12 May 2025 18:07:41 +0200 Subject: [PATCH 043/172] fix lint in venom_main --- vyper/cli/venom_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/cli/venom_main.py b/vyper/cli/venom_main.py index d6b7bcec50..d34f1ce41a 100755 --- a/vyper/cli/venom_main.py +++ b/vyper/cli/venom_main.py @@ -61,7 +61,7 @@ def _parse_args(argv: list[str]): run_passes_on(ctx, OptimizationLevel.default()) asm = generate_assembly_experimental(ctx) - bytecode = generate_bytecode(asm, compiler_metadata=None) + bytecode = generate_bytecode(asm) print(f"0x{bytecode.hex()}") From 69b7c107e4dc25d030a6122df47fb3779d9cff66 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 12 May 2025 18:09:43 +0200 Subject: [PATCH 044/172] rename make_symbols to resolve_symbols --- vyper/ir/compile_ir.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 37c05cbb19..98e87b31e0 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1305,13 +1305,13 @@ def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[st source_map: source map dict that gets output for the user """ # This API might seem a bit strange, but it's backwards compatible - symbol_map, const_map, source_map = make_symbol_map(assembly) + symbol_map, const_map, source_map = resolve_symbols(assembly) bytecode = _assembly_to_evm(assembly, symbol_map, const_map) return bytecode, source_map # resolve symbols in assembly -def make_symbol_map( +def resolve_symbols( assembly: list[AssemblyInstruction], ) -> tuple[dict[Label, int], dict[CONSTREF, int], dict[str, Any]]: """ From 4487bf849126e8927d270484b095850485900dcb Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 12 May 2025 18:10:14 +0200 Subject: [PATCH 045/172] lint --- vyper/cli/venom_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/cli/venom_main.py b/vyper/cli/venom_main.py index d34f1ce41a..0ceeae73f6 100755 --- a/vyper/cli/venom_main.py +++ b/vyper/cli/venom_main.py @@ -61,7 +61,7 @@ def _parse_args(argv: list[str]): run_passes_on(ctx, OptimizationLevel.default()) asm = generate_assembly_experimental(ctx) - bytecode = generate_bytecode(asm) + bytecode, _ = generate_bytecode(asm) print(f"0x{bytecode.hex()}") From a96b8563ca2f28fbba313876e01ceac599e8598d Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 13 May 2025 11:41:48 +0200 Subject: [PATCH 046/172] add dummy entry label --- vyper/compiler/output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index fe6eba6507..5ad12748dc 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -327,7 +327,7 @@ def build_layout_output(compiler_data: CompilerData) -> StorageLayout: def _build_asm(asm_list): - output_string = "" + output_string = "__entry__:" in_push = 0 for item in asm_list: if isinstance(item, (compile_ir.Label, compile_ir.DataHeader)): From 04d96b3650971bd55208b03d0a6395a4c85e46b4 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 13 May 2025 12:22:19 +0200 Subject: [PATCH 047/172] add todo --- vyper/ir/compile_ir.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 98e87b31e0..53ad495133 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1445,6 +1445,7 @@ def _assembly_to_evm( elif isinstance(item, PUSHLABEL): # push a symbol to stack label = item.label + # TODO: make _compile_push_instruction bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[label], n=SYMBOL_SIZE)) ret.extend(bytecode) From 355b7b41c621040fa406dbc485d0ce7b10cc89cc Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 14 May 2025 13:44:11 +0200 Subject: [PATCH 048/172] add compile_push_instruction helper --- vyper/ir/compile_ir.py | 14 ++++++++++---- vyper/venom/venom_to_assembly.py | 2 ++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 53ad495133..bee0b37519 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1414,6 +1414,13 @@ def resolve_symbols( return symbol_map, const_map, source_map +# helper function +def _compile_push_instruction(assembly: list[AssemblyInstruction]) -> bytes: + push_mnemonic = assembly[0] + assert push_mnemonic.startswith("PUSH") + push_instr = PUSH_OFFSET + int(push_mnemonic[4:]) + assert all(isinstance(item, int) for item in assembly[1:]) + return bytes([push_instr, *assembly[1:]]) def _assembly_to_evm( assembly: list[AssemblyInstruction], @@ -1445,8 +1452,7 @@ def _assembly_to_evm( elif isinstance(item, PUSHLABEL): # push a symbol to stack label = item.label - # TODO: make _compile_push_instruction - bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[label], n=SYMBOL_SIZE)) + bytecode = _compile_push_instruction(PUSH_N(symbol_map[label], n=SYMBOL_SIZE)) ret.extend(bytecode) elif isinstance(item, Label): @@ -1459,11 +1465,11 @@ def _assembly_to_evm( # PUSH_OFST (const foo) 32 if isinstance(item.label, Label): ofst = symbol_map[item.label] + item.ofst - bytecode, _ = assembly_to_evm(PUSH_N(ofst, SYMBOL_SIZE)) + bytecode = _compile_push_instruction(PUSH_N(ofst, SYMBOL_SIZE)) else: assert isinstance(item.label, CONSTREF) ofst = const_map[item.label] + item.ofst - bytecode, _ = assembly_to_evm(PUSH(ofst)) + bytecode = _compile_push_instruction(PUSH(ofst)) ret.extend(bytecode) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 7f70ceac6e..f4c2f03fe5 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import Any, Iterable from vyper.exceptions import CompilerPanic, StackTooDeep From 73a1e69510845361fab2f65f521e6d752073e06d Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 14 May 2025 13:46:57 +0200 Subject: [PATCH 049/172] remove find_nested_opcode helper --- vyper/compiler/phases.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 49237cc668..130f9254e7 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -370,7 +370,7 @@ def generate_assembly( ir_nodes, optimize=optimize, compiler_metadata=compiler_metadata ) - if _find_nested_opcode(assembly, "DEBUG"): + if "DEBUG" in assembly: vyper_warn( VyperWarning( "This code contains DEBUG opcodes! The DEBUG opcode will only work in " @@ -380,14 +380,6 @@ def generate_assembly( return assembly -def _find_nested_opcode(assembly, key): - if key in assembly: - return True - else: - sublists = [sub for sub in assembly if isinstance(sub, list)] - return any(_find_nested_opcode(x, key) for x in sublists) - - def generate_bytecode(assembly: list) -> tuple[bytes, dict[str, Any]]: """ Generate bytecode from assembly instructions. From dc2b366685aacfdc3e9f12b7d76164319fa40ca9 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 23 May 2025 17:02:34 +0300 Subject: [PATCH 050/172] start working on venom asm --- vyper/compiler/phases.py | 24 ++++++++++++++---------- vyper/venom/__init__.py | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 39f916ec1e..9cf5cfee02 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -25,7 +25,7 @@ from vyper.semantics.types.module import ModuleT from vyper.typing import StorageLayout from vyper.utils import ERC5202_PREFIX, sha256sum -from vyper.venom import generate_assembly_experimental, generate_ir +from vyper.venom import generate_assembly_experimental, generate_venom from vyper.warnings import VyperWarning, vyper_warn DEFAULT_CONTRACT_PATH = PurePath("VyperContract.vy") @@ -255,11 +255,17 @@ def function_signatures(self) -> dict[str, ContractFunctionT]: return {f.name: f._metadata["func_type"] for f in fs} @cached_property - def venom_functions(self): - deploy_ir, runtime_ir = self._ir_output - deploy_venom = generate_ir(deploy_ir, self.settings) - runtime_venom = generate_ir(runtime_ir, self.settings) - return deploy_venom, runtime_venom + def venom_runtime(self): + runtime_venom = generate_ir(self.ir_runtime, self.settings) + return runtime_venom + + @cached_property + def venom_deploytime(self): + runtime_asm = self.assembly_runtime + runtime_bytecode = self.bytecode_runtime + runtime_data_segment_lengths = get_data_segment_lengths(runtime_asm) + + venom_ctx = generate_ir(self.ir_nodes, self.settings) @cached_property def assembly(self) -> list: @@ -268,10 +274,9 @@ def assembly(self) -> list: metadata = bytes.fromhex(self.integrity_sum) if self.settings.experimental_codegen: - deploy_code, runtime_code = self.venom_functions assert self.settings.optimize is not None # mypy hint return generate_assembly_experimental( - runtime_code, deploy_code=deploy_code, optimize=self.settings.optimize + self.venom_deploytime, optimize=self.settings.optimize ) else: return generate_assembly( @@ -281,9 +286,8 @@ def assembly(self) -> list: @cached_property def assembly_runtime(self) -> list: if self.settings.experimental_codegen: - _, runtime_code = self.venom_functions assert self.settings.optimize is not None # mypy hint - return generate_assembly_experimental(runtime_code, optimize=self.settings.optimize) + return generate_assembly_experimental(self.venom_runtime, optimize=self.settings.optimize) else: return generate_assembly(self.ir_runtime, self.settings.optimize) diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 8564de0a69..8309140618 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -126,7 +126,7 @@ def run_passes_on(ctx: IRContext, optimize: OptimizationLevel) -> None: _run_passes(fn, optimize, ir_analyses[fn]) -def generate_ir(ir: IRnode, settings: Settings) -> IRContext: +def generate_venom(ir: IRnode, settings: Settings) -> IRContext: # Convert "old" IR to "new" IR ctx = ir_node_to_venom(ir) From e3a57accdbdc46b271be627de53689be68e3333f Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 23 May 2025 17:09:38 +0300 Subject: [PATCH 051/172] add a comment --- vyper/compiler/phases.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 9cf5cfee02..7e3b9f0a61 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -265,6 +265,7 @@ def venom_deploytime(self): runtime_bytecode = self.bytecode_runtime runtime_data_segment_lengths = get_data_segment_lengths(runtime_asm) + # TODO: inject the data segments and constants into venom_ctx. venom_ctx = generate_ir(self.ir_nodes, self.settings) @cached_property From a0d0651e04e397df99a63bf9f3ec4afcc13405f7 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 24 May 2025 15:15:16 +0300 Subject: [PATCH 052/172] wip: remove special "exit" instruction --- vyper/compiler/output.py | 8 ++--- vyper/compiler/phases.py | 39 ++++++++++++++++++---- vyper/ir/compile_ir.py | 55 +++++++++++++++++++++----------- vyper/venom/__init__.py | 23 ++++++++++--- vyper/venom/basicblock.py | 6 +--- vyper/venom/context.py | 5 +-- vyper/venom/ir_node_to_venom.py | 23 +++++++++---- vyper/venom/memory_location.py | 2 +- vyper/venom/venom_to_assembly.py | 48 ++++++++-------------------- 9 files changed, 125 insertions(+), 84 deletions(-) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 5ad12748dc..569d72ac6d 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -166,19 +166,19 @@ def build_interface_output(compiler_data: CompilerData) -> str: def build_bb_output(compiler_data: CompilerData) -> IRnode: - return compiler_data.venom_functions[0] + return compiler_data.venom_deploytime def build_bb_runtime_output(compiler_data: CompilerData) -> IRnode: - return compiler_data.venom_functions[1] + return compiler_data.venom_runtime def build_cfg_output(compiler_data: CompilerData) -> str: - return compiler_data.venom_functions[0].as_graph() + return compiler_data.venom_deploytime.as_graph() def build_cfg_runtime_output(compiler_data: CompilerData) -> str: - return compiler_data.venom_functions[1].as_graph() + return compiler_data.venom_runtime.as_graph() def build_ir_output(compiler_data: CompilerData) -> IRnode: diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 7e3b9f0a61..c7046ca1eb 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -256,17 +256,24 @@ def function_signatures(self) -> dict[str, ContractFunctionT]: @cached_property def venom_runtime(self): - runtime_venom = generate_ir(self.ir_runtime, self.settings) + runtime_venom = generate_venom(self.ir_runtime, self.settings) return runtime_venom @cached_property def venom_deploytime(self): - runtime_asm = self.assembly_runtime - runtime_bytecode = self.bytecode_runtime - runtime_data_segment_lengths = get_data_segment_lengths(runtime_asm) + data_sections = {"runtime_begin": self.bytecode_runtime} + if self.bytecode_metadata is not None: + data_sections["cbor_metadata"] = self.bytecode_metadata + + constants = { + "runtime_codesize": len(self.bytecode_runtime), + "immutables_len": self.compilation_target._metadata["type"].immutable_section_bytes, + } - # TODO: inject the data segments and constants into venom_ctx. - venom_ctx = generate_ir(self.ir_nodes, self.settings) + venom_ctx = generate_venom( + self.ir_nodes, self.settings, constants=constants, data_sections=data_sections + ) + return venom_ctx @cached_property def assembly(self) -> list: @@ -284,11 +291,29 @@ def assembly(self) -> list: self.ir_nodes, self.settings.optimize, compiler_metadata=metadata ) + @cached_property + def bytecode_metadata(self) -> Optional[bytes]: + if self.no_bytecode_metadata: + return None + + runtime_asm = self.assembly_runtime + runtime_data_segment_lengths = compile_ir.get_data_segment_lengths(runtime_asm) + + immutables_len = self.compilation_target._metadata["type"].immutable_section_bytes + runtime_codesize = len(self.bytecode_runtime) + + metadata = self.integrity_sum + return compile_ir.generate_cbor_metadata( + metadata, runtime_codesize, runtime_data_segment_lengths, immutables_len + ) + @cached_property def assembly_runtime(self) -> list: if self.settings.experimental_codegen: assert self.settings.optimize is not None # mypy hint - return generate_assembly_experimental(self.venom_runtime, optimize=self.settings.optimize) + return generate_assembly_experimental( + self.venom_runtime, optimize=self.settings.optimize + ) else: return generate_assembly(self.ir_runtime, self.settings.optimize) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index bee0b37519..4258035f10 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -113,7 +113,7 @@ def __eq__(self, other): class PUSHLABEL: def __init__(self, label: Label): - assert isinstance(label, Label) + assert isinstance(label, Label), label self.label = label def __repr__(self): @@ -186,6 +186,28 @@ def is_ofst(assembly_item): return isinstance(assembly_item, PUSH_OFST) +def generate_cbor_metadata( + compiler_metadata: Any, + runtime_codesize: int, + runtime_data_segment_lengths: list[int], + immutables_len: int, +) -> bytes: + metadata = ( + compiler_metadata, + runtime_codesize, + runtime_data_segment_lengths, + immutables_len, + {"vyper": version_tuple}, + ) + ret = cbor2.dumps(metadata) + # append the length of the footer, *including* the length + # of the length bytes themselves. + suffix_len = len(ret) + 2 + ret += suffix_len.to_bytes(2, "big") + + return ret + + def _runtime_code_offsets(ctor_mem_size, runtime_codelen): # we need two numbers to calculate where the runtime code # should be copied to in memory (and making sure we don't @@ -304,6 +326,8 @@ def compile_to_assembly( return res +# TODO: move all these assembly data structures to own module, like +# vyper.evm.assembly AssemblyInstruction = ( str | TaggedInstruction | int | PUSHLABEL | Label | PUSH_OFST | DATA_ITEM | DataHeader | CONST ) @@ -690,11 +714,6 @@ def _height_of(varname): ] ) - # TODO: these two probably not needed - # o.append(CONST("ctor_mem_size", memsize)) - # o.append(CONST("immutables_len", immutables_len)) - - o.append(CONST("mem_deploy_start", mem_deploy_start)) o.append(CONST("mem_deploy_end", mem_deploy_end)) # calculate the len of runtime code + immutables size @@ -708,18 +727,12 @@ def _height_of(varname): if self.compiler_metadata is not None: # we should issue the cbor-encoded metadata. - metadata = ( + bytecode_suffix = generate_cbor_metadata( self.compiler_metadata, runtime_codesize, runtime_data_segment_lengths, immutables_len, - {"vyper": version_tuple}, ) - bytecode_suffix = cbor2.dumps(metadata) - # append the length of the footer, *including* the length - # of the length bytes themselves. - suffix_len = len(bytecode_suffix) + 2 - bytecode_suffix += suffix_len.to_bytes(2, "big") segment: list[AssemblyInstruction] = [DataHeader(Label("cbor_metadata"))] segment.append(DATA_ITEM(bytecode_suffix)) @@ -1252,7 +1265,7 @@ def optimize_assembly(assembly): # predict what length of an assembly [data] node will be in bytecode -def get_data_segment_lengths(assembly): +def get_data_segment_lengths(assembly: list[AssemblyInstruction]) -> list[int]: ret = [] for item in assembly: if isinstance(item, DataHeader): @@ -1265,7 +1278,7 @@ def get_data_segment_lengths(assembly): if is_symbol(item.data): ret[-1] += SYMBOL_SIZE elif isinstance(item.data, bytes): - ret[-1] += len(item) + ret[-1] += len(item.data) else: # pragma: nocover raise ValueError(f"invalid data {type(item)} {item}") @@ -1414,13 +1427,19 @@ def resolve_symbols( return symbol_map, const_map, source_map + # helper function def _compile_push_instruction(assembly: list[AssemblyInstruction]) -> bytes: push_mnemonic = assembly[0] - assert push_mnemonic.startswith("PUSH") + assert isinstance(push_mnemonic, str) and push_mnemonic.startswith("PUSH") push_instr = PUSH_OFFSET + int(push_mnemonic[4:]) - assert all(isinstance(item, int) for item in assembly[1:]) - return bytes([push_instr, *assembly[1:]]) + ret = [push_instr] + + for item in assembly[1:]: + assert isinstance(item, int) + ret.append(item) + return bytes(ret) + def _assembly_to_evm( assembly: list[AssemblyInstruction], diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 8309140618..0f0b4f3b85 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -7,8 +7,10 @@ from vyper.compiler.settings import OptimizationLevel, Settings from vyper.evm.address_space import MEMORY, STORAGE, TRANSIENT from vyper.exceptions import CompilerPanic +from vyper.ir.compile_ir import AssemblyInstruction from vyper.venom.analysis import MemSSA from vyper.venom.analysis.analysis import IRAnalysesCache +from vyper.venom.basicblock import IRLabel, IRLiteral from vyper.venom.context import IRContext from vyper.venom.function import IRFunction from vyper.venom.ir_node_to_venom import ir_node_to_venom @@ -43,10 +45,10 @@ def generate_assembly_experimental( runtime_code: IRContext, deploy_code: Optional[IRContext] = None, optimize: OptimizationLevel = DEFAULT_OPT_LEVEL, -) -> list[str]: +) -> list[AssemblyInstruction]: # note: VenomCompiler is sensitive to the order of these! if deploy_code is not None: - functions = [deploy_code, runtime_code] + functions = [deploy_code] else: functions = [runtime_code] @@ -126,9 +128,22 @@ def run_passes_on(ctx: IRContext, optimize: OptimizationLevel) -> None: _run_passes(fn, optimize, ir_analyses[fn]) -def generate_venom(ir: IRnode, settings: Settings) -> IRContext: +def generate_venom( + ir: IRnode, + settings: Settings, + constants: dict[str, int] = None, + data_sections: dict[str, bytes] = None, +) -> IRContext: # Convert "old" IR to "new" IR - ctx = ir_node_to_venom(ir) + starting_symbols = None + if constants is not None: + starting_symbols = {k: IRLiteral(v) for k, v in constants.items()} + ctx = ir_node_to_venom(ir, starting_symbols) + + data_sections = data_sections or {} + for section_name, data in data_sections.items(): + ctx.append_data_section(IRLabel(section_name)) + ctx.append_data_item(data) optimize = settings.optimize assert optimize is not None # help mypy diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 3bae508616..d2c217f15c 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -14,9 +14,7 @@ from vyper.venom.function import IRFunction # instructions which can terminate a basic block -BB_TERMINATORS = frozenset( - ["jmp", "djmp", "jnz", "ret", "return", "revert", "stop", "exit", "sink"] -) +BB_TERMINATORS = frozenset(["jmp", "djmp", "jnz", "ret", "return", "revert", "stop", "sink"]) VOLATILE_INSTRUCTIONS = frozenset( [ @@ -51,7 +49,6 @@ "assert", "assert_unreachable", "stop", - "exit", ] ) @@ -80,7 +77,6 @@ "djmp", "jnz", "log", - "exit", "nop", ] ) diff --git a/vyper/venom/context.py b/vyper/venom/context.py index 30fac4875d..d83421d0de 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -33,8 +33,6 @@ def __str__(self): class IRContext: functions: dict[IRLabel, IRFunction] entry_function: Optional[IRFunction] - ctor_mem_size: Optional[int] - immutables_len: Optional[int] data_segment: list[DataSection] last_label: int last_variable: int @@ -42,9 +40,8 @@ class IRContext: def __init__(self) -> None: self.functions = {} self.entry_function = None - self.ctor_mem_size = None - self.immutables_len = None self.data_segment = [] + self.last_label = 0 self.last_variable = 0 diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 65793ea5c0..6bbbe3179e 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -8,6 +8,7 @@ from vyper.codegen.context import Alloca from vyper.codegen.ir_node import IRnode from vyper.evm.opcodes import get_opcodes +from vyper.ir.compile_ir import _runtime_code_offsets from vyper.venom.basicblock import ( IRBasicBlock, IRInstruction, @@ -95,7 +96,6 @@ "selfdestruct", "assert", "assert_unreachable", - "exit", "calldatacopy", "mcopy", "extcodecopy", @@ -129,7 +129,7 @@ def get_scratch_alloca_id() -> int: # convert IRnode directly to venom -def ir_node_to_venom(ir: IRnode) -> IRContext: +def ir_node_to_venom(ir: IRnode, symbols: Optional[dict] = None) -> IRContext: _ = ir.unique_symbols # run unique symbols check global _alloca_table, _callsites @@ -140,7 +140,8 @@ def ir_node_to_venom(ir: IRnode) -> IRContext: fn = ctx.create_function(MAIN_ENTRY_LABEL_NAME) ctx.entry_function = fn - _convert_ir_bb(fn, ir, {}) + symbols = symbols or {} + _convert_ir_bb(fn, ir, symbols) for fn in ctx.functions.values(): for bb in fn.get_basic_blocks(): @@ -410,9 +411,19 @@ def _convert_ir_bb(fn, ir, symbols): "return", IRVariable("ret_size"), IRVariable("ret_ofst") ) elif ir.value == "deploy": - ctx.ctor_mem_size = ir.args[0].value - ctx.immutables_len = ir.args[2].value - fn.get_basic_block().append_instruction("exit") + ctor_mem_size = ir.args[0].value + immutables_len = ir.args[2].value + runtime_codesize = symbols["runtime_codesize"].value + assert immutables_len == symbols["immutables_len"].value # sanity + + mem_deploy_start, mem_deploy_end = _runtime_code_offsets(ctor_mem_size, runtime_codesize) + bb = fn.get_basic_block() + + bb.append_instruction( + "codecopy", mem_deploy_start, IRLabel("runtime_begin"), runtime_codesize + ) + amount_to_return = bb.append_instruction("add", runtime_codesize, immutables_len) + bb.append_instruction("return", mem_deploy_start, amount_to_return) return None elif ir.value == "seq": if len(ir.args) == 0: diff --git a/vyper/venom/memory_location.py b/vyper/venom/memory_location.py index 977c8a1c76..bf7d1fc8d7 100644 --- a/vyper/venom/memory_location.py +++ b/vyper/venom/memory_location.py @@ -251,7 +251,7 @@ def _get_storage_read_location(inst, addr_space: AddrSpace) -> MemoryLocation: return MemoryLocation.UNDEFINED elif opcode in ("create", "create2"): return MemoryLocation.UNDEFINED - elif opcode in ("return", "stop", "exit", "sink"): + elif opcode in ("return", "stop", "sink"): # these opcodes terminate execution and commit to (persistent) # storage, resulting in storage writes escaping our control. # returning `MemoryLocation.UNDEFINED` represents "future" reads diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index f4c2f03fe5..df8799430b 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -4,9 +4,11 @@ from vyper.exceptions import CompilerPanic, StackTooDeep from vyper.ir.compile_ir import ( + DATA_ITEM, PUSH, PUSH_OFST, PUSHLABEL, + AssemblyInstruction, DataHeader, Label, TaggedInstruction, @@ -128,7 +130,7 @@ def _as_asm_symbol(label: IRLabel) -> Label: return Label(label.value) -def _ofst(label: str | Label, value: int) -> list[Any]: +def _ofst(label: Label, value: int) -> list[Any]: # resolve at compile time using magic PUSH_OFST op return [PUSH_OFST(label, value)] @@ -156,14 +158,13 @@ def __init__(self, ctxs: list[IRContext]): def mklabel(self, name: str) -> Label: self.label_counter += 1 - return f"{name}_{self.label_counter}" + return Label(f"{name}_{self.label_counter}") def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstruction]: self.visited_basicblocks = OrderedSet() self.label_counter = 0 - asm: list[Any] = [] - top_asm = asm + asm: list[AssemblyInstruction] = [] for ctx in self.ctxs: for fn in ctx.functions.values(): @@ -178,48 +179,27 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr self._generate_evm_for_basicblock_r(asm, fn.entry, StackModel()) - # TODO make this property on IRFunction - asm.extend([Label("ctor_exit")]) - if ctx.immutables_len is not None and ctx.ctor_mem_size is not None: - asm.extend( - [ - PUSHLABEL(Label("subcode_size")), - PUSHLABEL(Label("runtime_begin")), - "_mem_deploy_start", - "CODECOPY", - ] - ) - asm.extend(_ofst(Label("subcode_size"), ctx.immutables_len)) # stack: len - asm.extend(["_mem_deploy_start"]) # stack: len mem_ofst - asm.extend(["RETURN"]) - asm.extend(_REVERT_POSTAMBLE) - runtime_asm = [ - RuntimeHeader(Label("runtime_begin"), ctx.ctor_mem_size, ctx.immutables_len) - ] - asm.append(runtime_asm) - asm = runtime_asm - else: - asm.extend(_REVERT_POSTAMBLE) + asm.extend(_REVERT_POSTAMBLE) # Append data segment for data_section in ctx.data_segment: label = data_section.label - asm_data_section: list[Any] = [] + asm_data_section: list[AssemblyInstruction] = [] asm_data_section.append(DataHeader(_as_asm_symbol(label))) for item in data_section.data_items: data = item.data if isinstance(data, IRLabel): - asm_data_section.append(_as_asm_symbol(data)) + asm_data_section.append(DATA_ITEM(_as_asm_symbol(data))) else: assert isinstance(data, bytes) - asm_data_section.append(data) + asm_data_section.append(DATA_ITEM(data)) - asm.append(asm_data_section) + asm.extend(asm_data_section) if no_optimize is False: - optimize_assembly(top_asm) + optimize_assembly(asm) - return top_asm + return asm def _stack_reorder( self, assembly: list, stack: StackModel, stack_ops: list[IROperand], dry_run: bool = False @@ -407,7 +387,7 @@ def clean_stack_from_cfg_in( def _generate_evm_for_instruction( self, inst: IRInstruction, stack: StackModel, next_liveness: OrderedSet ) -> list[str]: - assembly: list[str | int] = [] + assembly: list[AssemblyInstruction] = [] opcode = inst.opcode # @@ -564,8 +544,6 @@ def _generate_evm_for_instruction( assembly.append("JUMP") elif opcode == "return": assembly.append("RETURN") - elif opcode == "exit": - assembly.extend([PUSHLABEL(Label("ctor_exit")), "JUMP"]) elif opcode == "phi": pass elif opcode == "sha3": From fa443a484d1f8a8a372e740197df5500b87b4e8a Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 24 May 2025 15:39:37 +0300 Subject: [PATCH 053/172] add constants to venom context --- vyper/venom/__init__.py | 20 ++++------- vyper/venom/context.py | 6 ++++ vyper/venom/ir_node_to_venom.py | 7 ++-- vyper/venom/venom_to_assembly.py | 62 +++++++++++++++++--------------- 4 files changed, 51 insertions(+), 44 deletions(-) diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 0f0b4f3b85..42ae08a9d4 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -42,17 +42,9 @@ def generate_assembly_experimental( - runtime_code: IRContext, - deploy_code: Optional[IRContext] = None, - optimize: OptimizationLevel = DEFAULT_OPT_LEVEL, + venom_ctx: IRContext, optimize: OptimizationLevel = DEFAULT_OPT_LEVEL ) -> list[AssemblyInstruction]: - # note: VenomCompiler is sensitive to the order of these! - if deploy_code is not None: - functions = [deploy_code] - else: - functions = [runtime_code] - - compiler = VenomCompiler(functions) + compiler = VenomCompiler(venom_ctx) return compiler.generate_evm_assembly(optimize == OptimizationLevel.NONE) @@ -135,9 +127,8 @@ def generate_venom( data_sections: dict[str, bytes] = None, ) -> IRContext: # Convert "old" IR to "new" IR - starting_symbols = None - if constants is not None: - starting_symbols = {k: IRLiteral(v) for k, v in constants.items()} + constants = constants or {} + starting_symbols = {k: IRLiteral(v) for k, v in constants.items()} ctx = ir_node_to_venom(ir, starting_symbols) data_sections = data_sections or {} @@ -145,6 +136,9 @@ def generate_venom( ctx.append_data_section(IRLabel(section_name)) ctx.append_data_item(data) + for constname, value in constants.items(): + ctx.add_constant(constname, value) + optimize = settings.optimize assert optimize is not None # help mypy run_passes_on(ctx, optimize) diff --git a/vyper/venom/context.py b/vyper/venom/context.py index d83421d0de..f50dc1220f 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -33,6 +33,7 @@ def __str__(self): class IRContext: functions: dict[IRLabel, IRFunction] entry_function: Optional[IRFunction] + constants: dict[str, int] # globally defined constants data_segment: list[DataSection] last_label: int last_variable: int @@ -41,6 +42,7 @@ def __init__(self) -> None: self.functions = {} self.entry_function = None self.data_segment = [] + self.constants = {} self.last_label = 0 self.last_variable = 0 @@ -96,6 +98,10 @@ def append_data_item(self, data: IRLabel | bytes) -> None: data_section = self.data_segment[-1] data_section.data_items.append(DataItem(data)) + def add_constant(self, name: str, value: int) -> None: + assert name not in self.constants + self.constants[name] = value + def as_graph(self) -> str: s = ["digraph G {"] for fn in self.functions.values(): diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 6bbbe3179e..6f76ed0bdd 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -417,13 +417,16 @@ def _convert_ir_bb(fn, ir, symbols): assert immutables_len == symbols["immutables_len"].value # sanity mem_deploy_start, mem_deploy_end = _runtime_code_offsets(ctor_mem_size, runtime_codesize) + + fn.ctx.add_constant("mem_deploy_end", mem_deploy_end) + bb = fn.get_basic_block() bb.append_instruction( - "codecopy", mem_deploy_start, IRLabel("runtime_begin"), runtime_codesize + "codecopy", runtime_codesize, IRLabel("runtime_begin"), mem_deploy_start ) amount_to_return = bb.append_instruction("add", runtime_codesize, immutables_len) - bb.append_instruction("return", mem_deploy_start, amount_to_return) + bb.append_instruction("return", amount_to_return, mem_deploy_start) return None elif ir.value == "seq": if len(ir.args) == 0: diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index df8799430b..823f346ce1 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -151,8 +151,9 @@ class VenomCompiler: dfg: DFGAnalysis cfg: CFGAnalysis - def __init__(self, ctxs: list[IRContext]): - self.ctxs = ctxs + def __init__(self, ctx: IRContext): + # TODO: maybe just accept a single IRContext + self.ctx = ctx self.label_counter = 0 self.visited_basicblocks = OrderedSet() @@ -166,35 +167,34 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr asm: list[AssemblyInstruction] = [] - for ctx in self.ctxs: - for fn in ctx.functions.values(): - ac = IRAnalysesCache(fn) + for fn in self.ctx.functions.values(): + ac = IRAnalysesCache(fn) - NormalizationPass(ac, fn).run_pass() - self.liveness = ac.request_analysis(LivenessAnalysis) - self.dfg = ac.request_analysis(DFGAnalysis) - self.cfg = ac.request_analysis(CFGAnalysis) + NormalizationPass(ac, fn).run_pass() + self.liveness = ac.request_analysis(LivenessAnalysis) + self.dfg = ac.request_analysis(DFGAnalysis) + self.cfg = ac.request_analysis(CFGAnalysis) - assert self.cfg.is_normalized(), "Non-normalized CFG!" + assert self.cfg.is_normalized(), "Non-normalized CFG!" - self._generate_evm_for_basicblock_r(asm, fn.entry, StackModel()) + self._generate_evm_for_basicblock_r(asm, fn.entry, StackModel()) - asm.extend(_REVERT_POSTAMBLE) + asm.extend(_REVERT_POSTAMBLE) - # Append data segment - for data_section in ctx.data_segment: - label = data_section.label - asm_data_section: list[AssemblyInstruction] = [] - asm_data_section.append(DataHeader(_as_asm_symbol(label))) - for item in data_section.data_items: - data = item.data - if isinstance(data, IRLabel): - asm_data_section.append(DATA_ITEM(_as_asm_symbol(data))) - else: - assert isinstance(data, bytes) - asm_data_section.append(DATA_ITEM(data)) + # Append data segment + for data_section in self.ctx.data_segment: + label = data_section.label + asm_data_section: list[AssemblyInstruction] = [] + asm_data_section.append(DataHeader(_as_asm_symbol(label))) + for item in data_section.data_items: + data = item.data + if isinstance(data, IRLabel): + asm_data_section.append(DATA_ITEM(_as_asm_symbol(data))) + else: + assert isinstance(data, bytes) + asm_data_section.append(DATA_ITEM(data)) - asm.extend(asm_data_section) + asm.extend(asm_data_section) if no_optimize is False: optimize_assembly(asm) @@ -567,17 +567,21 @@ def _generate_evm_for_instruction( assembly.extend([PUSHLABEL(end_symbol), "JUMPI", "INVALID", end_symbol]) elif opcode == "iload": addr = inst.operands[0] + mem_deploy_end = self.ctx.constants["mem_deploy_end"] if isinstance(addr, IRLiteral): - assembly.extend(_ofst("_mem_deploy_end", addr.value)) + ptr = mem_deploy_end + addr.value + assembly.extend(PUSH(ptr)) else: - assembly.extend(["_mem_deploy_end", "ADD"]) + assembly.extend([*PUSH(mem_deploy_end), "ADD"]) assembly.append("MLOAD") elif opcode == "istore": addr = inst.operands[1] + mem_deploy_end = self.ctx.constants["mem_deploy_end"] if isinstance(addr, IRLiteral): - assembly.extend(_ofst("_mem_deploy_end", addr.value)) + ptr = mem_deploy_end + addr.value + assembly.extend(PUSH(ptr)) else: - assembly.extend(["_mem_deploy_end", "ADD"]) + assembly.extend([*PUSH(mem_deploy_end), "ADD"]) assembly.append("MSTORE") elif opcode == "log": assembly.extend([f"LOG{log_topic_count}"]) From 318bcef5c0646e13fc94cde2692d5a48b956e60a Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 24 May 2025 17:20:29 +0300 Subject: [PATCH 054/172] fix symbol map resolution --- vyper/ir/compile_ir.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 4258035f10..aa59b4ed49 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1384,10 +1384,11 @@ def resolve_symbols( # update pc if isinstance(item, Label): - # Don't increment pc as the symbol itself doesn't go into code _add_to_symbol_map(symbol_map, item, pc) + pc += 1 # jumpdest elif isinstance(item, DataHeader): + # Don't increment pc as the symbol itself doesn't go into code _add_to_symbol_map(symbol_map, item.label, pc) elif isinstance(item, PUSHLABEL): From 3d834ecb97227127189de3809bbeeebe2325111d Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 24 May 2025 17:22:47 +0300 Subject: [PATCH 055/172] update tests --- tests/hevm.py | 4 ++-- tests/unit/compiler/venom/test_venom_to_assembly.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/hevm.py b/tests/hevm.py index da104db60a..f8d7f58a80 100644 --- a/tests/hevm.py +++ b/tests/hevm.py @@ -66,8 +66,8 @@ def _prep_hevm_venom_ctx(ctx, verbose=False): LowerDloadPass(ac, fn).run_pass() SingleUseExpansion(ac, fn).run_pass() - compiler = VenomCompiler([ctx]) - asm = compiler.generate_evm(no_optimize=False) + compiler = VenomCompiler(ctx) + asm = compiler.generate_evm_assembly(no_optimize=False) return assembly_to_evm(asm)[0].hex() diff --git a/tests/unit/compiler/venom/test_venom_to_assembly.py b/tests/unit/compiler/venom/test_venom_to_assembly.py index ba520c06d1..73162485b2 100644 --- a/tests/unit/compiler/venom/test_venom_to_assembly.py +++ b/tests/unit/compiler/venom/test_venom_to_assembly.py @@ -13,7 +13,7 @@ def test_dead_params(): """ ctx = parse_venom(code) - asm = VenomCompiler([ctx]).generate_evm() + asm = VenomCompiler(ctx).generate_evm_assembly() assert asm == ["SWAP1", "POP", "JUMP"] @@ -32,5 +32,5 @@ def test_optimistic_swap_params(): """ ctx = parse_venom(code) - asm = VenomCompiler([ctx]).generate_evm() + asm = VenomCompiler(ctx).generate_evm_assembly() assert asm == ["SWAP2", "PUSH1", 117, "POP", "MSTORE", "MSTORE", "JUMP"] From 27b5cf61085a6fdb6bd5173556e1dd9599e986d5 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 24 May 2025 17:30:00 +0300 Subject: [PATCH 056/172] update some tests, remove sha3_32 --- tests/unit/compiler/ir/test_repeat.py | 2 +- tests/unit/compiler/test_sha3_32.py | 12 ------------ vyper/compiler/phases.py | 2 +- vyper/ir/compile_ir.py | 15 --------------- vyper/venom/memory_location.py | 2 -- 5 files changed, 2 insertions(+), 31 deletions(-) delete mode 100644 tests/unit/compiler/test_sha3_32.py diff --git a/tests/unit/compiler/ir/test_repeat.py b/tests/unit/compiler/ir/test_repeat.py index e134be087d..f7fe869dea 100644 --- a/tests/unit/compiler/ir/test_repeat.py +++ b/tests/unit/compiler/ir/test_repeat.py @@ -1,5 +1,5 @@ def test_repeat(get_contract_from_ir, assert_compile_failed): - good_ir = ["repeat", 0, 0, 1, 1, ["seq"]] + good_ir = ["repeat", "i", 0, 1, 1, ["seq"]] bad_ir_1 = ["repeat", 0, 0, 0, 0, ["seq"]] bad_ir_2 = ["repeat", 0, 0, -1, -1, ["seq"]] get_contract_from_ir(good_ir) diff --git a/tests/unit/compiler/test_sha3_32.py b/tests/unit/compiler/test_sha3_32.py deleted file mode 100644 index e1cbf9c843..0000000000 --- a/tests/unit/compiler/test_sha3_32.py +++ /dev/null @@ -1,12 +0,0 @@ -from vyper.codegen.ir_node import IRnode -from vyper.evm.opcodes import version_check -from vyper.ir import compile_ir, optimizer - - -def test_sha3_32(): - ir = ["sha3_32", 0] - evm = ["PUSH1", 0, "PUSH1", 0, "MSTORE", "PUSH1", 32, "PUSH1", 0, "SHA3"] - if version_check(begin="shanghai"): - evm = ["PUSH0", "PUSH0", "MSTORE", "PUSH1", 32, "PUSH0", "SHA3"] - assert compile_ir.compile_to_assembly(IRnode.from_list(ir)) == evm - assert compile_ir.compile_to_assembly(optimizer.optimize(IRnode.from_list(ir))) == evm diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index c7046ca1eb..790d35b02d 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -302,7 +302,7 @@ def bytecode_metadata(self) -> Optional[bytes]: immutables_len = self.compilation_target._metadata["type"].immutable_section_bytes runtime_codesize = len(self.bytecode_runtime) - metadata = self.integrity_sum + metadata = bytes.fromhex(self.integrity_sum) return compile_ir.generate_cbor_metadata( metadata, runtime_codesize, runtime_data_segment_lengths, immutables_len ) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index aa59b4ed49..14f65ea165 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -764,21 +764,6 @@ def _height_of(varname): o.extend(self._assert_false()) return o - # SHA3 a single value - if code.value == "sha3_32": - # TODO: this should not be emitted anymore. - o = self._compile_r(code.args[0], height) - o.extend( - [ - *PUSH(MemoryPositions.FREE_VAR_SPACE), - "MSTORE", - *PUSH(32), - *PUSH(MemoryPositions.FREE_VAR_SPACE), - "SHA3", - ] - ) - return o - # SHA3 a 64 byte value if code.value == "sha3_64": o = self._compile_r(code.args[0], height) diff --git a/vyper/venom/memory_location.py b/vyper/venom/memory_location.py index bf7d1fc8d7..ec2a2f9da8 100644 --- a/vyper/venom/memory_location.py +++ b/vyper/venom/memory_location.py @@ -208,8 +208,6 @@ def _get_memory_read_location(inst) -> MemoryLocation: elif opcode == "sha3": size, offset = inst.operands return MemoryLocation.from_operands(offset, size) - elif opcode == "sha3_32": - raise CompilerPanic("invalid opcode") # should be unused elif opcode == "sha3_64": return MemoryLocation(offset=0, size=64) elif opcode == "log": From 6c1a65b861ecb430a1c13ad4951a7354d4fd6924 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 25 May 2025 10:20:45 +0300 Subject: [PATCH 057/172] refactor: move getpos to compiler/output.py --- vyper/compiler/output.py | 6 +++++- vyper/ir/compile_ir.py | 5 ----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 569d72ac6d..1d33744407 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -354,6 +354,10 @@ def _build_node_identifier(ast_node): return (ast_node.module_node.source_id, ast_node.node_id) +def _getpos(node): + return (node.lineno, node.col_offset, node.end_lineno, node.end_col_offset) + + def _build_source_map_output(compiler_data, bytecode, pc_maps): """ Generate source map output in various formats. Note that integrations @@ -374,7 +378,7 @@ def _build_source_map_output(compiler_data, bytecode, pc_maps): # tag it with source id ast_map[0] = compiler_data.annotated_vyper_module - pc_pos_map = {k: compile_ir.getpos(v) for (k, v) in ast_map.items()} + pc_pos_map = {k: _getpos(v) for (k, v) in ast_map.items()} node_id_map = {k: _build_node_identifier(v) for (k, v) in ast_map.items()} compressed_map = _compress_source_map(ast_map, out["pc_jump_map"], len(bytecode)) out["pc_pos_map_compressed"] = compressed_map diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 14f65ea165..f61a0d480d 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -966,11 +966,6 @@ def _assert_false(self): ############################## -# TODO: move this to some ast file or vyper/compiler/output.py -def getpos(node): - return (node.lineno, node.col_offset, node.end_lineno, node.end_col_offset) - - def note_line_num(line_number_map, pc, item): # Record AST attached to pc if isinstance(item, TaggedInstruction): From 653a3ebc4e759360b3237219c6031de29c067ad8 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 25 May 2025 10:25:13 +0300 Subject: [PATCH 058/172] update source map tests runtime source map now only appears in source_map_runtime, not source_map. --- tests/unit/compiler/test_source_map.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tests/unit/compiler/test_source_map.py b/tests/unit/compiler/test_source_map.py index bd5d75a447..055d5666ae 100644 --- a/tests/unit/compiler/test_source_map.py +++ b/tests/unit/compiler/test_source_map.py @@ -33,7 +33,8 @@ def foo(a: uint256) -> int128: def test_jump_map(optimize, experimental_codegen): - source_map = compile_code(TEST_CODE, output_formats=["source_map"])["source_map"] + compiler_output = compile_code(TEST_CODE, output_formats=["source_map_runtime"]) + source_map = compiler_output["source_map_runtime"] pos_map = source_map["pc_pos_map"] jump_map = source_map["pc_jump_map"] @@ -75,7 +76,8 @@ def test_jump_map(optimize, experimental_codegen): def test_pos_map_offsets(): - source_map = compile_code(TEST_CODE, output_formats=["source_map"])["source_map"] + compiler_output = compile_code(TEST_CODE, output_formats=["source_map_runtime"]) + source_map = compiler_output["source_map_runtime"] expanded = expand_source_map(source_map["pc_pos_map_compressed"]) pc_iter = iter(source_map["pc_pos_map"][i] for i in sorted(source_map["pc_pos_map"])) @@ -105,7 +107,9 @@ def test_error_map(experimental_codegen): def update_foo(): self.foo += 1 """ - error_map = compile_code(code, output_formats=["source_map"])["source_map"]["error_map"] + compiler_output = compile_code(code, output_formats=["source_map_runtime"]) + error_map = compiler_output["source_map_runtime"]["error_map"] + assert "safeadd" in error_map.values() if experimental_codegen: @@ -121,7 +125,8 @@ def test_error_map_with_user_error(): def foo(): raise "some error" """ - error_map = compile_code(code, output_formats=["source_map"])["source_map"]["error_map"] + compiler_output = compile_code(code, output_formats=["source_map_runtime"]) + error_map = compiler_output["source_map_runtime"]["error_map"] assert "user revert with reason" in error_map.values() @@ -132,7 +137,8 @@ def foo(i: uint256): a: DynArray[uint256, 10] = [1] a[i % 10] = 2 """ - error_map = compile_code(code, output_formats=["source_map"])["source_map"]["error_map"] + compiler_output = compile_code(code, output_formats=["source_map_runtime"]) + error_map = compiler_output["source_map_runtime"]["error_map"] assert "safemod" in error_map.values() @@ -147,7 +153,8 @@ def bar(i: uint256) -> String[85]: # ensure the mod doesn't get erased return concat("foo foo", uint2str(i)) """ - error_map = compile_code(code, output_formats=["source_map"])["source_map"]["error_map"] + compiler_output= compile_code(code, output_formats=["source_map_runtime"]) + error_map = compiler_output["source_map_runtime"]["error_map"] assert "user revert with reason" in error_map.values() assert "safemod" in error_map.values() @@ -196,10 +203,11 @@ def _construct_node_id_map(ast_struct): def test_node_id_map(): code = TEST_CODE - out = compile_code(code, output_formats=["annotated_ast_dict", "source_map", "ir"]) - assert out["source_map"]["pc_ast_map_item_keys"] == ("source_id", "node_id") + out = compile_code(code, output_formats=["annotated_ast_dict", "source_map_runtime", "ir"]) + source_map = out["source_map_runtime"] + assert source_map["pc_ast_map_item_keys"] == ("source_id", "node_id") - pc_ast_map = out["source_map"]["pc_ast_map"] + pc_ast_map = source_map["pc_ast_map"] ast_node_map = _construct_node_id_map(out["annotated_ast_dict"]) From b078d66a7bcbfd6481600670085c35429aae71fe Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 25 May 2025 10:30:20 +0300 Subject: [PATCH 059/172] update asm optimizer tests --- tests/unit/compiler/asm/test_asm_optimizer.py | 20 +++++++++---------- tests/unit/compiler/test_source_map.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/unit/compiler/asm/test_asm_optimizer.py b/tests/unit/compiler/asm/test_asm_optimizer.py index dfbb53ad5a..fff8a961bf 100644 --- a/tests/unit/compiler/asm/test_asm_optimizer.py +++ b/tests/unit/compiler/asm/test_asm_optimizer.py @@ -3,7 +3,7 @@ from vyper.compiler import compile_code from vyper.compiler.phases import CompilerData from vyper.compiler.settings import OptimizationLevel, Settings -from vyper.ir.compile_ir import _merge_jumpdests +from vyper.ir.compile_ir import PUSHLABEL, Label, _merge_jumpdests codes = [ """ @@ -82,18 +82,18 @@ def test_dead_code_eliminator(code): c = CompilerData(code, settings=Settings(optimize=OptimizationLevel.NONE)) # get the labels - initcode_asm = [i for i in c.assembly if isinstance(i, str)] - runtime_asm = [i for i in c.assembly_runtime if isinstance(i, str)] + initcode_labels = [i for i in c.assembly if isinstance(i, Label)] + runtime_labels = [i for i in c.assembly_runtime if isinstance(i, Label)] ctor_only = "ctor_only()" runtime_only = "runtime_only()" # qux reachable from unoptimized initcode, foo not reachable. - assert any(ctor_only in instr for instr in initcode_asm) - assert all(runtime_only not in instr for instr in initcode_asm) + assert any(ctor_only in label.label for label in initcode_labels) + assert all(runtime_only not in label.label for label in initcode_labels) - assert any(runtime_only in instr for instr in runtime_asm) - assert all(ctor_only not in instr for instr in runtime_asm) + assert any(runtime_only in label.label for label in runtime_labels) + assert all(ctor_only not in label.label for label in runtime_labels) def test_library_code_eliminator(make_input_bundle, experimental_codegen): @@ -118,8 +118,8 @@ def foo(): library.some_function() """ input_bundle = make_input_bundle({"library.vy": library}) - res = compile_code(code, input_bundle=input_bundle, output_formats=["asm"]) - asm = res["asm"] + res = compile_code(code, input_bundle=input_bundle, output_formats=["asm_runtime"]) + asm = res["asm_runtime"] if not experimental_codegen: assert "some_function()" in asm # Venom function inliner will remove this @@ -129,6 +129,6 @@ def foo(): def test_merge_jumpdests(): - asm = ["_sym_label_0", "JUMP", "PUSH0", "_sym_label_0", "JUMPDEST", "_sym_label_0", "JUMPDEST"] + asm = [PUSHLABEL(Label("label_0")), "JUMP", "PUSH0", Label("label_0"), Label("_label_0")] assert _merge_jumpdests(asm) is False, "should not return True as no changes were made" diff --git a/tests/unit/compiler/test_source_map.py b/tests/unit/compiler/test_source_map.py index 055d5666ae..0272ea9044 100644 --- a/tests/unit/compiler/test_source_map.py +++ b/tests/unit/compiler/test_source_map.py @@ -153,7 +153,7 @@ def bar(i: uint256) -> String[85]: # ensure the mod doesn't get erased return concat("foo foo", uint2str(i)) """ - compiler_output= compile_code(code, output_formats=["source_map_runtime"]) + compiler_output = compile_code(code, output_formats=["source_map_runtime"]) error_map = compiler_output["source_map_runtime"]["error_map"] assert "user revert with reason" in error_map.values() assert "safemod" in error_map.values() From b9f9f429e1d8460554e8309a194c1041daff0981 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 25 May 2025 10:57:28 +0300 Subject: [PATCH 060/172] fix test - craft assembly directly --- .../builtins/codegen/test_create_functions.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tests/functional/builtins/codegen/test_create_functions.py b/tests/functional/builtins/codegen/test_create_functions.py index 085d012759..2cfcfd25cf 100644 --- a/tests/functional/builtins/codegen/test_create_functions.py +++ b/tests/functional/builtins/codegen/test_create_functions.py @@ -5,9 +5,8 @@ import vyper.ir.compile_ir as compile_ir from tests.utils import ZERO_ADDRESS -from vyper.codegen.ir_node import IRnode from vyper.compiler import compile_code -from vyper.compiler.settings import OptimizationLevel +from vyper.ir.compile_ir import PUSH, PUSHLABEL, Label, DataHeader, DATA_ITEM from vyper.utils import EIP_170_LIMIT, ERC5202_PREFIX, checksum_encode, keccak256 @@ -295,10 +294,19 @@ def test(code_ofst: uint256) -> address: # deploy a blueprint contract whose contained initcode contains only # zeroes (so no matter which offset, create_from_blueprint will # return empty code) - ir = IRnode.from_list(["deploy", 0, ["seq"] + ["stop"] * initcode_len, 0]) - bytecode, _ = compile_ir.assembly_to_evm( - compile_ir.compile_to_assembly(ir, optimize=OptimizationLevel.NONE) - ) + asm = [ + *PUSH(initcode_len), + PUSHLABEL(Label("end")), + *PUSH(0), + "CODECOPY", + *PUSH(initcode_len), + *PUSH(0), + "RETURN", + DataHeader(Label("end")), + DATA_ITEM(b"\x00" * initcode_len), + ] + bytecode, _ = compile_ir.assembly_to_evm(asm) + # manually deploy the bytecode c = env.deploy(abi=[], bytecode=bytecode) blueprint_address = c.address From 8421c670ffae9cf2be649a68ca12cfe3358d63e7 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 25 May 2025 11:03:45 +0300 Subject: [PATCH 061/172] fix off by one --- vyper/ir/compile_ir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index f61a0d480d..0b6a21a8fe 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -477,7 +477,7 @@ def _height_of(varname): # codecopy 32 bytes to FREE_VAR_SPACE, then mload from FREE_VAR_SPACE o.extend(PUSH(32)) - o.extend(self._data_ofst_of(Label("code_end"), loc, height)) + o.extend(self._data_ofst_of(Label("code_end"), loc, height + 1)) o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["CODECOPY"]) o.extend(PUSH(MemoryPositions.FREE_VAR_SPACE) + ["MLOAD"]) From 9994490645f9b5602194039726de272ed8ec4fca Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 25 May 2025 11:25:11 +0300 Subject: [PATCH 062/172] update more tests --- tests/functional/builtins/codegen/test_create_functions.py | 2 +- tests/functional/codegen/test_selector_table_stability.py | 7 ++++--- tests/functional/venom/test_venom_label_variables.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/functional/builtins/codegen/test_create_functions.py b/tests/functional/builtins/codegen/test_create_functions.py index 2cfcfd25cf..3b7200f218 100644 --- a/tests/functional/builtins/codegen/test_create_functions.py +++ b/tests/functional/builtins/codegen/test_create_functions.py @@ -6,7 +6,7 @@ import vyper.ir.compile_ir as compile_ir from tests.utils import ZERO_ADDRESS from vyper.compiler import compile_code -from vyper.ir.compile_ir import PUSH, PUSHLABEL, Label, DataHeader, DATA_ITEM +from vyper.ir.compile_ir import DATA_ITEM, PUSH, PUSHLABEL, DataHeader, Label from vyper.utils import EIP_170_LIMIT, ERC5202_PREFIX, checksum_encode, keccak256 diff --git a/tests/functional/codegen/test_selector_table_stability.py b/tests/functional/codegen/test_selector_table_stability.py index 82955ab5e1..a1f58d6852 100644 --- a/tests/functional/codegen/test_selector_table_stability.py +++ b/tests/functional/codegen/test_selector_table_stability.py @@ -9,13 +9,14 @@ def test_dense_jumptable_stability(): code = "\n".join(f"@external\ndef {name}():\n pass" for name in function_names) output = compile_code( - code, output_formats=["asm"], settings=Settings(optimize=OptimizationLevel.CODESIZE) + code, output_formats=["asm_runtime"], settings=Settings(optimize=OptimizationLevel.CODESIZE) ) # test that the selector table data is stable across different runs # (xdist should provide different PYTHONHASHSEEDs). - expected_asm = """{ DATA BUCKET_HEADERS b\'\\x0bB\' LABEL bucket_0 b\'\\n\' b\'+\\x8d\' LABEL bucket_1 b\'\\x0c\' b\'\\x00\\x85\' LABEL bucket_2 b\'\\x08\' } { DATA bucket_1 b\'\\xd8\\xee\\xa1\\xe8\' LABEL external 6 foo6()3639517672 b\'\\x05\' b\'\\xd2\\x9e\\xe0\\xf9\' LABEL external 0 foo0()3533627641 b\'\\x05\' b\'\\x05\\xf1\\xe0_\' LABEL external 2 foo2()99737695 b\'\\x05\' b\'\\x91\\t\\xb4{\' LABEL external 23 foo23()2433332347 b\'\\x05\' b\'np3\\x7f\' LABEL external 11 foo11()1852846975 b\'\\x05\' b\'&\\xf5\\x96\\xf9\' LABEL external 13 foo13()653629177 b\'\\x05\' b\'\\x04ga\\xeb\' LABEL external 14 foo14()73884139 b\'\\x05\' b\'\\x89\\x06\\xad\\xc6\' LABEL external 17 foo17()2298916294 b\'\\x05\' b\'\\xe4%\\xac\\xd1\' LABEL external 4 foo4()3827674321 b\'\\x05\' b\'yj\\x01\\xac\' LABEL external 7 foo7()2036990380 b\'\\x05\' b\'\\xf1\\xe6K\\xe5\' LABEL external 29 foo29()4058401765 b\'\\x05\' b\'\\xd2\\x89X\\xb8\' LABEL external 3 foo3()3532216504 b\'\\x05\' } { DATA bucket_2 b\'\\x06p\\xffj\' LABEL external 25 foo25()108068714 b\'\\x05\' b\'\\x964\\x99I\' LABEL external 24 foo24()2520029513 b\'\\x05\' b\'s\\x81\\xe7\\xc1\' LABEL external 10 foo10()1937893313 b\'\\x05\' b\'\\x85\\xad\\xc11\' LABEL external 28 foo28()2242756913 b\'\\x05\' b\'\\xfa"\\xb1\\xed\' LABEL external 5 foo5()4196577773 b\'\\x05\' b\'A\\xe7[\\x05\' LABEL external 22 foo22()1105681157 b\'\\x05\' b\'\\xd3\\x89U\\xe8\' LABEL external 1 foo1()3548993000 b\'\\x05\' b\'hL\\xf8\\xf3\' LABEL external 20 foo20()1749874931 b\'\\x05\' } { DATA bucket_0 b\'\\xee\\xd9\\x1d\\xe3\' LABEL external 9 foo9()4007206371 b\'\\x05\' b\'a\\xbc\\x1ch\' LABEL external 16 foo16()1639717992 b\'\\x05\' b\'\\xd3*\\xa7\\x0c\' LABEL external 21 foo21()3542787852 b\'\\x05\' b\'\\x18iG\\xd9\' LABEL external 19 foo19()409552857 b\'\\x05\' b\'\\n\\xf1\\xf9\\x7f\' LABEL external 18 foo18()183630207 b\'\\x05\' b\')\\xda\\xd7`\' LABEL external 27 foo27()702207840 b\'\\x05\' b\'2\\xf6\\xaa\\xda\' LABEL external 12 foo12()855026394 b\'\\x05\' b\'\\xbe\\xb5\\x05\\xf5\' LABEL external 15 foo15()3199534581 b\'\\x05\' b\'\\xfc\\xa7_\\xe6\' LABEL external 8 foo8()4238827494 b\'\\x05\' b\'\\x1b\\x12C8\' LABEL external 26 foo26()454181688 b\'\\x05\' } }""" # noqa: E501, FS003 - assert expected_asm in output["asm"] + expected_asm = """DATA BUCKET_HEADERS:\n DATABYTES 0b42\n DATALABEL bucket_0\n DATABYTES 0a\n DATABYTES 2b8d\n DATALABEL bucket_1\n DATABYTES 0c\n DATABYTES 0085\n DATALABEL bucket_2\n DATABYTES 08\n\nDATA bucket_1:\n DATABYTES d8eea1e8\n DATALABEL external 6 foo6()3639517672\n DATABYTES 05\n DATABYTES d29ee0f9\n DATALABEL external 0 foo0()3533627641\n DATABYTES 05\n DATABYTES 05f1e05f\n DATALABEL external 2 foo2()99737695\n DATABYTES 05\n DATABYTES 9109b47b\n DATALABEL external 23 foo23()2433332347\n DATABYTES 05\n DATABYTES 6e70337f\n DATALABEL external 11 foo11()1852846975\n DATABYTES 05\n DATABYTES 26f596f9\n DATALABEL external 13 foo13()653629177\n DATABYTES 05\n DATABYTES 046761eb\n DATALABEL external 14 foo14()73884139\n DATABYTES 05\n DATABYTES 8906adc6\n DATALABEL external 17 foo17()2298916294\n DATABYTES 05\n DATABYTES e425acd1\n DATALABEL external 4 foo4()3827674321\n DATABYTES 05\n DATABYTES 796a01ac\n DATALABEL external 7 foo7()2036990380\n DATABYTES 05\n DATABYTES f1e64be5\n DATALABEL external 29 foo29()4058401765\n DATABYTES 05\n DATABYTES d28958b8\n DATALABEL external 3 foo3()3532216504\n DATABYTES 05\n\nDATA bucket_2:\n DATABYTES 0670ff6a\n DATALABEL external 25 foo25()108068714\n DATABYTES 05\n DATABYTES 96349949\n DATALABEL external 24 foo24()2520029513\n DATABYTES 05\n DATABYTES 7381e7c1\n DATALABEL external 10 foo10()1937893313\n DATABYTES 05\n DATABYTES 85adc131\n DATALABEL external 28 foo28()2242756913\n DATABYTES 05\n DATABYTES fa22b1ed\n DATALABEL external 5 foo5()4196577773\n DATABYTES 05\n DATABYTES 41e75b05\n DATALABEL external 22 foo22()1105681157\n DATABYTES 05\n DATABYTES d38955e8\n DATALABEL external 1 foo1()3548993000\n DATABYTES 05\n DATABYTES 684cf8f3\n DATALABEL external 20 foo20()1749874931\n DATABYTES 05\n\nDATA bucket_0:\n DATABYTES eed91de3\n DATALABEL external 9 foo9()4007206371\n DATABYTES 05\n DATABYTES 61bc1c68\n DATALABEL external 16 foo16()1639717992\n DATABYTES 05\n DATABYTES d32aa70c\n DATALABEL external 21 foo21()3542787852\n DATABYTES 05\n DATABYTES 186947d9\n DATALABEL external 19 foo19()409552857\n DATABYTES 05\n DATABYTES 0af1f97f\n DATALABEL external 18 foo18()183630207\n DATABYTES 05\n DATABYTES 29dad760\n DATALABEL external 27 foo27()702207840\n DATABYTES 05\n DATABYTES 32f6aada\n DATALABEL external 12 foo12()855026394\n DATABYTES 05\n DATABYTES beb505f5\n DATALABEL external 15 foo15()3199534581\n DATABYTES 05\n DATABYTES fca75fe6\n DATALABEL external 8 foo8()4238827494\n DATABYTES 05\n DATABYTES 1b124338\n DATALABEL external 26 foo26()454181688\n DATABYTES 05""" # noqa: E501 + + assert expected_asm in output["asm_runtime"] def test_sparse_jumptable_stability(): diff --git a/tests/functional/venom/test_venom_label_variables.py b/tests/functional/venom/test_venom_label_variables.py index ac101d7039..0f34f073d6 100644 --- a/tests/functional/venom/test_venom_label_variables.py +++ b/tests/functional/venom/test_venom_label_variables.py @@ -82,4 +82,4 @@ def test_labels_as_variables(): run_passes_on(ctx, OptimizationLevel.default()) asm = generate_assembly_experimental(ctx) - generate_bytecode(asm, compiler_metadata=None) + generate_bytecode(asm) From f058140b04f784a21f772e576bc9ed1064329598 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 25 May 2025 11:32:51 +0300 Subject: [PATCH 063/172] update another test --- tests/functional/codegen/features/test_init.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/functional/codegen/features/test_init.py b/tests/functional/codegen/features/test_init.py index 84d224f632..1e37b89701 100644 --- a/tests/functional/codegen/features/test_init.py +++ b/tests/functional/codegen/features/test_init.py @@ -15,13 +15,13 @@ def __init__(a: uint256): assert c.val() == 123 # Make sure the init code does not access calldata - assembly = vyper.compile_code(code, output_formats=["asm"])["asm"].split(" ") - ir_return_idx_start = assembly.index("{") - ir_return_idx_end = assembly.index("}") + compiler_output = vyper.compile_code(code, output_formats=["asm", "asm_runtime"]) + asm_deploytime = compiler_output["asm"] + asm_runtime = compiler_output["asm_runtime"] - assert "CALLDATALOAD" in assembly - assert "CALLDATACOPY" not in assembly[:ir_return_idx_start] + assembly[ir_return_idx_end:] - assert "CALLDATALOAD" not in assembly[:ir_return_idx_start] + assembly[ir_return_idx_end:] + assert "CALLDATALOAD" in asm_runtime + assert "CALLDATACOPY" not in asm_deploytime + assert "CALLDATALOAD" not in asm_deploytime def test_init_calls_internal(get_contract, assert_compile_failed, tx_failed): From 395189a15f901422ce68022369041bf2c1c615c7 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 25 May 2025 11:34:47 +0300 Subject: [PATCH 064/172] update more tests --- tests/functional/venom/test_venom_repr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/functional/venom/test_venom_repr.py b/tests/functional/venom/test_venom_repr.py index d08f71c2b9..18300fcaae 100644 --- a/tests/functional/venom/test_venom_repr.py +++ b/tests/functional/venom/test_venom_repr.py @@ -104,7 +104,7 @@ def _helper1(vyper_source, optimize): # test we can generate assembly+bytecode asm = generate_assembly_experimental(ctx) - generate_bytecode(asm, compiler_metadata=None) + generate_bytecode(asm) def _helper2(vyper_source, optimize, compiler_settings): @@ -126,7 +126,7 @@ def _helper2(vyper_source, optimize, compiler_settings): # test we can generate assembly+bytecode asm = generate_assembly_experimental(ctx, optimize=optimize) - bytecode = generate_bytecode(asm, compiler_metadata=None) + bytecode = generate_bytecode(asm) out = compile_code(vyper_source, settings=settings, output_formats=["bytecode_runtime"]) assert "0x" + bytecode.hex() == out["bytecode_runtime"] From 8712219d0c7377b769285071d6b27ea9af742ab5 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 25 May 2025 11:36:38 +0300 Subject: [PATCH 065/172] fix signature --- tests/functional/venom/test_venom_repr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/venom/test_venom_repr.py b/tests/functional/venom/test_venom_repr.py index 18300fcaae..c8bfc16229 100644 --- a/tests/functional/venom/test_venom_repr.py +++ b/tests/functional/venom/test_venom_repr.py @@ -126,7 +126,7 @@ def _helper2(vyper_source, optimize, compiler_settings): # test we can generate assembly+bytecode asm = generate_assembly_experimental(ctx, optimize=optimize) - bytecode = generate_bytecode(asm) + bytecode, _ = generate_bytecode(asm) out = compile_code(vyper_source, settings=settings, output_formats=["bytecode_runtime"]) assert "0x" + bytecode.hex() == out["bytecode_runtime"] From 11c82cfd64841a591efe5ecbca8be00a8e6b1871 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jun 2025 14:19:06 +0300 Subject: [PATCH 066/172] test const add --- tests/unit/compiler/ir/test_compile_ir.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/unit/compiler/ir/test_compile_ir.py b/tests/unit/compiler/ir/test_compile_ir.py index ba85297afb..accd5903ce 100644 --- a/tests/unit/compiler/ir/test_compile_ir.py +++ b/tests/unit/compiler/ir/test_compile_ir.py @@ -3,6 +3,7 @@ from vyper.codegen.ir_node import IRnode from vyper.evm.opcodes import version_check from vyper.ir import compile_ir +from vyper.ir.compile_ir import CONSTREF, CONST, CONST_ADD from vyper.ir.s_expressions import parse_s_exp fail_list = [ @@ -75,3 +76,14 @@ def test_pc_debugger(): offset = 5 assert line_number_map["pc_breakpoints"][0] == offset + +def test_const_add(): + asm = [ + CONST("a", 1), + CONST("b", 2), + CONST_ADD("c", "a", "b"), + CONST_ADD("d", "c", 10), + ] + const_map = compile_ir._resolve_constants(asm, {}) + assert const_map[CONSTREF("c")] == 3 + assert const_map[CONSTREF("d")] == 13 \ No newline at end of file From e4ff38b2015e80ef1f11e9df19cc2d579e9a5cf9 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jun 2025 14:19:38 +0300 Subject: [PATCH 067/172] const add implementation wip --- vyper/ir/compile_ir.py | 66 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 5 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 0b6a21a8fe..f2e3c2e0b2 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -110,6 +110,22 @@ def __eq__(self, other): return False return self.name == other.name and self.value == other.value +class CONST_ADD: + def __init__(self, name: str, op1: str | int, op2: str | int): + assert isinstance(name, str) + assert isinstance(op1, (str, int)) + assert isinstance(op2, (str, int)) + self.name = name + self.op1 = op1 + self.op2 = op2 + + def __repr__(self): + return f"CONST_ADD {self.name} {self.op1} {self.op2}" + + def __eq__(self, other): + if not isinstance(other, CONST_ADD): + return False + return self.name == other.name and self.op1 == other.op1 and self.op2 == other.op2 class PUSHLABEL: def __init__(self, label: Label): @@ -1302,6 +1318,48 @@ def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[st bytecode = _assembly_to_evm(assembly, symbol_map, const_map) return bytecode, source_map +def _resolve_constants(assembly: list[AssemblyInstruction], const_map: dict[CONSTREF, int]) -> dict[CONSTREF, int]: + for item in assembly: + if isinstance(item, CONST): + _add_to_symbol_map(const_map, CONSTREF(item.name), item.value) + + while True: + changed = False + for item in assembly: + if isinstance(item, CONST_ADD): + # Skip if this constant is already resolved + if CONSTREF(item.name) in const_map: + continue + + # Get values for both operands + op1_val = None + op2_val = None + + # Try to resolve op1 + if isinstance(item.op1, str): + op1_ref = CONSTREF(item.op1) + if op1_ref in const_map: + op1_val = const_map[op1_ref] + elif isinstance(item.op1, int): + op1_val = item.op1 + + # Try to resolve op2 + if isinstance(item.op2, str): + op2_ref = CONSTREF(item.op2) + if op2_ref in const_map: + op2_val = const_map[op2_ref] + elif isinstance(item.op2, int): + op2_val = item.op2 + + # If both operands are resolved, add the result + if op1_val is not None and op2_val is not None: + _add_to_symbol_map(const_map, CONSTREF(item.name), op1_val + op2_val) + changed = True + + if not changed: + break + + return const_map # resolve symbols in assembly def resolve_symbols( @@ -1328,11 +1386,9 @@ def resolve_symbols( pc: int = 0 - # resolve constants - for item in assembly: - if isinstance(item, CONST): - # should this be merged into the symbol map? - _add_to_symbol_map(const_map, CONSTREF(item.name), item.value) + const_map = _resolve_constants(assembly, const_map) + print("\n".join(str(item) for item in assembly)) + print(const_map) # resolve labels (i.e. JUMPDEST locations) to actual code locations, # and simultaneously build the source map. From 6acd1707f81c7528dd275fd8e50d45ed6a271e26 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jun 2025 14:23:19 +0300 Subject: [PATCH 068/172] lint --- tests/unit/compiler/ir/test_compile_ir.py | 12 ++-- vyper/ir/compile_ir.py | 67 ++++++++++++++--------- 2 files changed, 44 insertions(+), 35 deletions(-) diff --git a/tests/unit/compiler/ir/test_compile_ir.py b/tests/unit/compiler/ir/test_compile_ir.py index accd5903ce..eb20d987e2 100644 --- a/tests/unit/compiler/ir/test_compile_ir.py +++ b/tests/unit/compiler/ir/test_compile_ir.py @@ -3,7 +3,7 @@ from vyper.codegen.ir_node import IRnode from vyper.evm.opcodes import version_check from vyper.ir import compile_ir -from vyper.ir.compile_ir import CONSTREF, CONST, CONST_ADD +from vyper.ir.compile_ir import CONST, CONST_ADD, CONSTREF from vyper.ir.s_expressions import parse_s_exp fail_list = [ @@ -77,13 +77,9 @@ def test_pc_debugger(): assert line_number_map["pc_breakpoints"][0] == offset + def test_const_add(): - asm = [ - CONST("a", 1), - CONST("b", 2), - CONST_ADD("c", "a", "b"), - CONST_ADD("d", "c", 10), - ] + asm = [CONST("a", 1), CONST("b", 2), CONST_ADD("c", "a", "b"), CONST_ADD("d", "c", 10)] const_map = compile_ir._resolve_constants(asm, {}) assert const_map[CONSTREF("c")] == 3 - assert const_map[CONSTREF("d")] == 13 \ No newline at end of file + assert const_map[CONSTREF("d")] == 13 diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index f2e3c2e0b2..d8ab82bf27 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -110,6 +110,7 @@ def __eq__(self, other): return False return self.name == other.name and self.value == other.value + class CONST_ADD: def __init__(self, name: str, op1: str | int, op2: str | int): assert isinstance(name, str) @@ -121,12 +122,40 @@ def __init__(self, name: str, op1: str | int, op2: str | int): def __repr__(self): return f"CONST_ADD {self.name} {self.op1} {self.op2}" - + def __eq__(self, other): if not isinstance(other, CONST_ADD): return False return self.name == other.name and self.op1 == other.op1 and self.op2 == other.op2 + def calculate(self, const_map: dict[CONSTREF, int]) -> int | None: + # Get values for both operands + op1_val = None + op2_val = None + + # Try to resolve op1 + if isinstance(self.op1, str): + op1_ref = CONSTREF(self.op1) + if op1_ref in const_map: + op1_val = const_map[op1_ref] + elif isinstance(self.op1, int): + op1_val = self.op1 + + # Try to resolve op2 + if isinstance(self.op2, str): + op2_ref = CONSTREF(self.op2) + if op2_ref in const_map: + op2_val = const_map[op2_ref] + elif isinstance(self.op2, int): + op2_val = self.op2 + + # If both operands are resolved, return their sum + if op1_val is not None and op2_val is not None: + return op1_val + op2_val + + return None + + class PUSHLABEL: def __init__(self, label: Label): assert isinstance(label, Label), label @@ -1318,7 +1347,10 @@ def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[st bytecode = _assembly_to_evm(assembly, symbol_map, const_map) return bytecode, source_map -def _resolve_constants(assembly: list[AssemblyInstruction], const_map: dict[CONSTREF, int]) -> dict[CONSTREF, int]: + +def _resolve_constants( + assembly: list[AssemblyInstruction], const_map: dict[CONSTREF, int] +) -> dict[CONSTREF, int]: for item in assembly: if isinstance(item, CONST): _add_to_symbol_map(const_map, CONSTREF(item.name), item.value) @@ -1330,37 +1362,18 @@ def _resolve_constants(assembly: list[AssemblyInstruction], const_map: dict[CONS # Skip if this constant is already resolved if CONSTREF(item.name) in const_map: continue - - # Get values for both operands - op1_val = None - op2_val = None - - # Try to resolve op1 - if isinstance(item.op1, str): - op1_ref = CONSTREF(item.op1) - if op1_ref in const_map: - op1_val = const_map[op1_ref] - elif isinstance(item.op1, int): - op1_val = item.op1 - - # Try to resolve op2 - if isinstance(item.op2, str): - op2_ref = CONSTREF(item.op2) - if op2_ref in const_map: - op2_val = const_map[op2_ref] - elif isinstance(item.op2, int): - op2_val = item.op2 - - # If both operands are resolved, add the result - if op1_val is not None and op2_val is not None: - _add_to_symbol_map(const_map, CONSTREF(item.name), op1_val + op2_val) + + # Calculate the value if possible + if (value := item.calculate(const_map)) is not None: + _add_to_symbol_map(const_map, CONSTREF(item.name), value) changed = True - + if not changed: break return const_map + # resolve symbols in assembly def resolve_symbols( assembly: list[AssemblyInstruction], From 42423934a24d5103a269b8afabde1164cd977562 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jun 2025 14:34:55 +0300 Subject: [PATCH 069/172] max and refactor --- tests/unit/compiler/ir/test_compile_ir.py | 8 ++- vyper/ir/compile_ir.py | 64 +++++++++++++---------- 2 files changed, 42 insertions(+), 30 deletions(-) diff --git a/tests/unit/compiler/ir/test_compile_ir.py b/tests/unit/compiler/ir/test_compile_ir.py index eb20d987e2..c29871fda6 100644 --- a/tests/unit/compiler/ir/test_compile_ir.py +++ b/tests/unit/compiler/ir/test_compile_ir.py @@ -3,7 +3,7 @@ from vyper.codegen.ir_node import IRnode from vyper.evm.opcodes import version_check from vyper.ir import compile_ir -from vyper.ir.compile_ir import CONST, CONST_ADD, CONSTREF +from vyper.ir.compile_ir import CONST, CONST_ADD, CONST_MAX, CONSTREF from vyper.ir.s_expressions import parse_s_exp fail_list = [ @@ -83,3 +83,9 @@ def test_const_add(): const_map = compile_ir._resolve_constants(asm, {}) assert const_map[CONSTREF("c")] == 3 assert const_map[CONSTREF("d")] == 13 + +def test_const_max(): + asm = [CONST("a", 1), CONST("b", 2), CONST_MAX("c", "a", "b"), CONST_MAX("d", "c", 10)] + const_map = compile_ir._resolve_constants(asm, {}) + assert const_map[CONSTREF("c")] == 2 + assert const_map[CONSTREF("d")] == 10 \ No newline at end of file diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index d8ab82bf27..9aa6906aef 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -111,7 +111,7 @@ def __eq__(self, other): return self.name == other.name and self.value == other.value -class CONST_ADD: +class BaseConstOp: def __init__(self, name: str, op1: str | int, op2: str | int): assert isinstance(name, str) assert isinstance(op1, (str, int)) @@ -120,41 +120,47 @@ def __init__(self, name: str, op1: str | int, op2: str | int): self.op1 = op1 self.op2 = op2 - def __repr__(self): - return f"CONST_ADD {self.name} {self.op1} {self.op2}" - def __eq__(self, other): - if not isinstance(other, CONST_ADD): + if not isinstance(other, type(self)): return False return self.name == other.name and self.op1 == other.op1 and self.op2 == other.op2 + def _resolve_operand(self, operand: str | int, const_map: dict[CONSTREF, int]) -> int | None: + if isinstance(operand, str): + op_ref = CONSTREF(operand) + if op_ref in const_map: + return const_map[op_ref] + elif isinstance(operand, int): + return operand + return None + def calculate(self, const_map: dict[CONSTREF, int]) -> int | None: - # Get values for both operands - op1_val = None - op2_val = None - - # Try to resolve op1 - if isinstance(self.op1, str): - op1_ref = CONSTREF(self.op1) - if op1_ref in const_map: - op1_val = const_map[op1_ref] - elif isinstance(self.op1, int): - op1_val = self.op1 - - # Try to resolve op2 - if isinstance(self.op2, str): - op2_ref = CONSTREF(self.op2) - if op2_ref in const_map: - op2_val = const_map[op2_ref] - elif isinstance(self.op2, int): - op2_val = self.op2 - - # If both operands are resolved, return their sum - if op1_val is not None and op2_val is not None: - return op1_val + op2_val + op1_val = self._resolve_operand(self.op1, const_map) + op2_val = self._resolve_operand(self.op2, const_map) + if op1_val is not None and op2_val is not None: + return self._apply_operation(op1_val, op2_val) return None + def _apply_operation(self, op1_val: int, op2_val: int) -> int: + raise NotImplementedError("Subclasses must implement _apply_operation") + + +class CONST_ADD(BaseConstOp): + def __repr__(self): + return f"CONST_ADD {self.name} {self.op1} {self.op2}" + + def _apply_operation(self, op1_val: int, op2_val: int) -> int: + return op1_val + op2_val + + +class CONST_MAX(BaseConstOp): + def __repr__(self): + return f"CONST_MAX {self.name} {self.op1} {self.op2}" + + def _apply_operation(self, op1_val: int, op2_val: int) -> int: + return max(op1_val, op2_val) + class PUSHLABEL: def __init__(self, label: Label): @@ -1358,7 +1364,7 @@ def _resolve_constants( while True: changed = False for item in assembly: - if isinstance(item, CONST_ADD): + if isinstance(item, (CONST_ADD, CONST_MAX)): # Skip if this constant is already resolved if CONSTREF(item.name) in const_map: continue From dbb82c569522e4a960a37afc5b1876f98015ee62 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jun 2025 15:12:09 +0300 Subject: [PATCH 070/172] wip --- tests/unit/compiler/asm/test_asm_optimizer.py | 3 +- tests/unit/compiler/ir/test_compile_ir.py | 6 +- vyper/evm/assembler.py | 550 ++++++++++++ vyper/evm/optimizer.py | 249 ++++++ vyper/ir/compile_ir.py | 807 +----------------- vyper/venom/__init__.py | 2 +- vyper/venom/venom_to_assembly.py | 4 +- 7 files changed, 809 insertions(+), 812 deletions(-) create mode 100644 vyper/evm/assembler.py create mode 100644 vyper/evm/optimizer.py diff --git a/tests/unit/compiler/asm/test_asm_optimizer.py b/tests/unit/compiler/asm/test_asm_optimizer.py index fff8a961bf..8b21b02583 100644 --- a/tests/unit/compiler/asm/test_asm_optimizer.py +++ b/tests/unit/compiler/asm/test_asm_optimizer.py @@ -3,7 +3,8 @@ from vyper.compiler import compile_code from vyper.compiler.phases import CompilerData from vyper.compiler.settings import OptimizationLevel, Settings -from vyper.ir.compile_ir import PUSHLABEL, Label, _merge_jumpdests +from vyper.evm.assembler import PUSHLABEL, Label +from vyper.evm.optimizer import _merge_jumpdests codes = [ """ diff --git a/tests/unit/compiler/ir/test_compile_ir.py b/tests/unit/compiler/ir/test_compile_ir.py index c29871fda6..223d226272 100644 --- a/tests/unit/compiler/ir/test_compile_ir.py +++ b/tests/unit/compiler/ir/test_compile_ir.py @@ -3,7 +3,7 @@ from vyper.codegen.ir_node import IRnode from vyper.evm.opcodes import version_check from vyper.ir import compile_ir -from vyper.ir.compile_ir import CONST, CONST_ADD, CONST_MAX, CONSTREF +from vyper.evm.assembler import CONST, CONST_ADD, CONST_MAX, CONSTREF, _resolve_constants from vyper.ir.s_expressions import parse_s_exp fail_list = [ @@ -80,12 +80,12 @@ def test_pc_debugger(): def test_const_add(): asm = [CONST("a", 1), CONST("b", 2), CONST_ADD("c", "a", "b"), CONST_ADD("d", "c", 10)] - const_map = compile_ir._resolve_constants(asm, {}) + const_map = _resolve_constants(asm, {}) assert const_map[CONSTREF("c")] == 3 assert const_map[CONSTREF("d")] == 13 def test_const_max(): asm = [CONST("a", 1), CONST("b", 2), CONST_MAX("c", "a", "b"), CONST_MAX("d", "c", 10)] - const_map = compile_ir._resolve_constants(asm, {}) + const_map = _resolve_constants(asm, {}) assert const_map[CONSTREF("c")] == 2 assert const_map[CONSTREF("d")] == 10 \ No newline at end of file diff --git a/vyper/evm/assembler.py b/vyper/evm/assembler.py new file mode 100644 index 0000000000..1d14258da4 --- /dev/null +++ b/vyper/evm/assembler.py @@ -0,0 +1,550 @@ + +##################################### +# assembly data structures and utils +##################################### + + +from dataclasses import dataclass +from typing import Any, TypeVar + +from vyper.evm.opcodes import get_opcodes, version_check +from vyper.exceptions import CompilerPanic +from vyper.utils import OrderedSet + +PUSH_OFFSET = 0x5F +DUP_OFFSET = 0x7F +SWAP_OFFSET = 0x8F + + +def num_to_bytearray(x): + o = [] + while x > 0: + o.insert(0, x % 256) + x //= 256 + return o + + +class Label: + def __init__(self, label: str): + assert isinstance(label, str) + self.label = label + + def __repr__(self): + return f"LABEL {self.label}" + + def __eq__(self, other): + if not isinstance(other, Label): + return False + return self.label == other.label + + def __hash__(self): + return hash(self.label) + + +@dataclass +class DataHeader: + label: Label + + def __repr__(self): + return f"DATA {self.label.label}" + + +# this could be fused with Label, the only difference is if +# it gets looked up from const_map or symbol_map. +class CONSTREF: + def __init__(self, label: str): + assert isinstance(label, str) + self.label = label + + def __repr__(self): + return f"CONSTREF {self.label}" + + def __eq__(self, other): + if not isinstance(other, CONSTREF): + return False + return self.label == other.label + + def __hash__(self): + return hash(self.label) + + +class CONST: + def __init__(self, name: str, value: int): + assert isinstance(name, str) + assert isinstance(value, int) + self.name = name + self.value = value + + def __repr__(self): + return f"CONST {self.name} {self.value}" + + def __eq__(self, other): + if not isinstance(other, CONST): + return False + return self.name == other.name and self.value == other.value + + +class BaseConstOp: + def __init__(self, name: str, op1: str | int, op2: str | int): + assert isinstance(name, str) + assert isinstance(op1, (str, int)) + assert isinstance(op2, (str, int)) + self.name = name + self.op1 = op1 + self.op2 = op2 + + def __eq__(self, other): + if not isinstance(other, type(self)): + return False + return self.name == other.name and self.op1 == other.op1 and self.op2 == other.op2 + + def _resolve_operand(self, operand: str | int, const_map: dict[CONSTREF, int]) -> int | None: + if isinstance(operand, str): + op_ref = CONSTREF(operand) + if op_ref in const_map: + return const_map[op_ref] + elif isinstance(operand, int): + return operand + return None + + def calculate(self, const_map: dict[CONSTREF, int]) -> int | None: + op1_val = self._resolve_operand(self.op1, const_map) + op2_val = self._resolve_operand(self.op2, const_map) + + if op1_val is not None and op2_val is not None: + return self._apply_operation(op1_val, op2_val) + return None + + def _apply_operation(self, op1_val: int, op2_val: int) -> int: + raise NotImplementedError("Subclasses must implement _apply_operation") + + +class CONST_ADD(BaseConstOp): + def __repr__(self): + return f"CONST_ADD {self.name} {self.op1} {self.op2}" + + def _apply_operation(self, op1_val: int, op2_val: int) -> int: + return op1_val + op2_val + + +class CONST_MAX(BaseConstOp): + def __repr__(self): + return f"CONST_MAX {self.name} {self.op1} {self.op2}" + + def _apply_operation(self, op1_val: int, op2_val: int) -> int: + return max(op1_val, op2_val) + + +class PUSHLABEL: + def __init__(self, label: Label): + assert isinstance(label, Label), label + self.label = label + + def __repr__(self): + return f"PUSHLABEL {self.label.label}" + + def __eq__(self, other): + if not isinstance(other, PUSHLABEL): + return False + return self.label == other.label + + def __hash__(self): + return hash(self.label) + + +# push the result of an addition (which might be resolvable at compile-time) +class PUSH_OFST: + def __init__(self, label: Label | CONSTREF, ofst: int): + # label can be Label or CONSTREF + assert isinstance(label, (Label, CONSTREF)) + self.label = label + self.ofst = ofst + + def __repr__(self): + label = self.label + if isinstance(label, Label): + label = label.label # str + return f"PUSH_OFST({label}, {self.ofst})" + + def __eq__(self, other): + if not isinstance(other, PUSH_OFST): + return False + return self.label == other.label and self.ofst == other.ofst + + def __hash__(self): + return hash((self.label, self.ofst)) + + +class DATA_ITEM: + def __init__(self, item: bytes | Label): + self.data = item + + def __repr__(self): + if isinstance(self.data, bytes): + return f"DATABYTES {self.data.hex()}" + elif isinstance(self.data, Label): + return f"DATALABEL {self.data.label}" + +# a string (assembly instruction) but with additional metadata from the source code +class TaggedInstruction(str): + def __new__(cls, sstr, *args, **kwargs): + return super().__new__(cls, sstr) + + def __init__(self, sstr, ast_source=None, error_msg=None): + self.error_msg = error_msg + self.pc_debugger = False + + self.ast_source = ast_source + + +def PUSH(x): + bs = num_to_bytearray(x) + # starting in shanghai, can do push0 directly with no immediates + if len(bs) == 0 and not version_check(begin="shanghai"): + bs = [0] + return [f"PUSH{len(bs)}"] + bs + + +# push an exact number of bytes +def PUSH_N(x, n): + o = [] + for _i in range(n): + o.insert(0, x % 256) + x //= 256 + assert x == 0 + return [f"PUSH{len(o)}"] + o + +def JUMP(label: Label): + return [PUSHLABEL(label), "JUMP"] + + +def JUMPI(label: Label): + return [PUSHLABEL(label), "JUMPI"] + + +def mkdebug(pc_debugger, ast_source): + # compile debug instructions + # (this is dead code -- CMC 2025-05-08) + i = TaggedInstruction("DEBUG", ast_source) + i.pc_debugger = pc_debugger + return [i] + + +def is_symbol(i): + return isinstance(i, Label) + + +def is_ofst(assembly_item): + return isinstance(assembly_item, PUSH_OFST) + + +AssemblyInstruction = ( + str | TaggedInstruction | int | PUSHLABEL | Label | PUSH_OFST | DATA_ITEM | DataHeader | CONST +) + +T = TypeVar("T") + + +def _add_to_symbol_map(symbol_map: dict[T, int], item: T, value: int): + if item in symbol_map: # pragma: nocover + raise CompilerPanic(f"duplicate label: {item}") + symbol_map[item] = value + + +def _resolve_constants( + assembly: list[AssemblyInstruction], const_map: dict[CONSTREF, int] +) -> dict[CONSTREF, int]: + for item in assembly: + if isinstance(item, CONST): + _add_to_symbol_map(const_map, CONSTREF(item.name), item.value) + + while True: + changed = False + for item in assembly: + if isinstance(item, (CONST_ADD, CONST_MAX)): + # Skip if this constant is already resolved + if CONSTREF(item.name) in const_map: + continue + + # Calculate the value if possible + if (value := item.calculate(const_map)) is not None: + _add_to_symbol_map(const_map, CONSTREF(item.name), value) + changed = True + + if not changed: + break + + return const_map + + +def resolve_symbols( + assembly: list[AssemblyInstruction], +) -> tuple[dict[Label, int], dict[CONSTREF, int], dict[str, Any]]: + """ + Construct symbol map from assembly list + + Returns: + symbol_map: dict from labels to values + const_map: dict from CONSTREFs to values + source_map: source map dict that gets output for the user + """ + source_map: dict[str, Any] = { + "breakpoints": OrderedSet(), + "pc_breakpoints": OrderedSet(), + "pc_jump_map": {0: "-"}, + "pc_raw_ast_map": {}, + "error_map": {}, + } + + symbol_map: dict[Label, int] = {} + const_map: dict[CONSTREF, int] = {} + + pc: int = 0 + + const_map = _resolve_constants(assembly, const_map) + print("\n".join(str(item) for item in assembly)) + print(const_map) + + # resolve labels (i.e. JUMPDEST locations) to actual code locations, + # and simultaneously build the source map. + for i, item in enumerate(assembly): + # add it to the source map + note_line_num(source_map, pc, item) + + # update pc_jump_map + if item == "JUMP": + last = assembly[i - 1] + if isinstance(last, PUSHLABEL) and last.label.label.startswith("internal"): + if last.label.label.endswith("cleanup"): + # exit an internal function + source_map["pc_jump_map"][pc] = "o" + else: + # enter an internal function + source_map["pc_jump_map"][pc] = "i" + else: + # everything else + source_map["pc_jump_map"][pc] = "-" + elif item in ("JUMPI", "JUMPDEST"): + source_map["pc_jump_map"][pc] = "-" + + if item == "DEBUG": + continue # "debug" opcode does not go into bytecode + + if isinstance(item, CONST): + continue # CONST declarations do not go into bytecode + + # update pc + if isinstance(item, Label): + _add_to_symbol_map(symbol_map, item, pc) + pc += 1 # jumpdest + + elif isinstance(item, DataHeader): + # Don't increment pc as the symbol itself doesn't go into code + _add_to_symbol_map(symbol_map, item.label, pc) + + elif isinstance(item, PUSHLABEL): + pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits + + elif isinstance(item, PUSH_OFST): + assert isinstance(item.ofst, int), item + # [PUSH_OFST, (Label foo), bar] -> PUSH2 (foo+bar) + # [PUSH_OFST, _mem_foo, bar] -> PUSHN (foo+bar) + if isinstance(item.label, Label): + pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits + elif isinstance(item.label, CONSTREF): + const = const_map[item.label] + val = const + item.ofst + pc += calc_push_size(val) + else: # pragma: nocover + raise CompilerPanic(f"invalid ofst {item.label}") + + elif isinstance(item, DATA_ITEM): + if isinstance(item.data, Label): + pc += SYMBOL_SIZE + else: + assert isinstance(item.data, bytes) + pc += len(item.data) + elif isinstance(item, int): + assert 0 <= item < 256 + pc += 1 + else: + assert isinstance(item, str) and item in get_opcodes(), item + pc += 1 + + source_map["breakpoints"] = list(source_map["breakpoints"]) + source_map["pc_breakpoints"] = list(source_map["pc_breakpoints"]) + + # magic -- probably the assembler should actually add this label + _add_to_symbol_map(symbol_map, Label("code_end"), pc) + + return symbol_map, const_map, source_map + +# Calculate the size of PUSH instruction +def calc_push_size(val: int): + # stupid implementation. this is "slow", but its correctness is + # obvious verify, as opposed to + # ``` + # (val.bit_length() + 7) // 8 + # + (1 + # if (val > 0 or version_check(begin="shanghai")) + # else 0) + # ``` + return len(PUSH(val)) + + +def note_line_num(line_number_map, pc, item): + # Record AST attached to pc + if isinstance(item, TaggedInstruction): + if (ast_node := item.ast_source) is not None: + ast_node = ast_node.get_original_node() + if hasattr(ast_node, "node_id"): + line_number_map["pc_raw_ast_map"][pc] = ast_node + + if item.error_msg is not None: + line_number_map["error_map"][pc] = item.error_msg + + note_breakpoint(line_number_map, pc, item) + + +def note_breakpoint(line_number_map, pc, item): + # Record line number attached to pc + if item == "DEBUG": + # Is PC debugger, create PC breakpoint. + if item.pc_debugger: + line_number_map["pc_breakpoints"].add(pc) + # Create line number breakpoint. + else: + line_number_map["breakpoints"].add(item.lineno + 1) + + +SYMBOL_SIZE = 2 # size of a PUSH instruction for a code symbol + + +# predict what length of an assembly [data] node will be in bytecode +def get_data_segment_lengths(assembly: list[AssemblyInstruction]) -> list[int]: + ret = [] + for item in assembly: + if isinstance(item, DataHeader): + ret.append(0) + continue + if len(ret) == 0: + # haven't yet seen a data header + continue + assert isinstance(item, DATA_ITEM) + if is_symbol(item.data): + ret[-1] += SYMBOL_SIZE + elif isinstance(item.data, bytes): + ret[-1] += len(item.data) + else: # pragma: nocover + raise ValueError(f"invalid data {type(item)} {item}") + + return ret + + + +def _compile_data_item(item: DATA_ITEM, symbol_map: dict[Label, int]) -> bytes: + if isinstance(item.data, bytes): + return item.data + if isinstance(item.data, Label): + symbolbytes = symbol_map[item.data].to_bytes(SYMBOL_SIZE, "big") + return symbolbytes + + raise CompilerPanic(f"Invalid data {type(item.data)}, {item.data}") # pragma: nocover + + +# helper function +def _compile_push_instruction(assembly: list[AssemblyInstruction]) -> bytes: + push_mnemonic = assembly[0] + assert isinstance(push_mnemonic, str) and push_mnemonic.startswith("PUSH") + push_instr = PUSH_OFFSET + int(push_mnemonic[4:]) + ret = [push_instr] + + for item in assembly[1:]: + assert isinstance(item, int) + ret.append(item) + return bytes(ret) + +def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[str, Any]]: + """ + Generate bytecode and source map from assembly + + Returns: + bytecode: bytestring of the EVM bytecode + source_map: source map dict that gets output for the user + """ + # This API might seem a bit strange, but it's backwards compatible + symbol_map, const_map, source_map = resolve_symbols(assembly) + bytecode = _assembly_to_evm(assembly, symbol_map, const_map) + return bytecode, source_map + +def _assembly_to_evm( + assembly: list[AssemblyInstruction], + symbol_map: dict[Label, int], + const_map: dict[CONSTREF, int], +) -> bytes: + """ + Assembles assembly into EVM bytecode + + Parameters: + assembly: list of asm instructions + symbol_map: dict from labels to resolved locations in the code + const_map: dict from constrefs to their values + + Returns: bytes representing the bytecode + """ + ret = bytearray() + + # now that all symbols have been resolved, generate bytecode + # using the symbol map + for item in assembly: + if item in ("DEBUG",): + continue # skippable opcodes + elif isinstance(item, CONST): + continue # CONST things do not show up in bytecode + elif isinstance(item, DataHeader): + continue # DataHeader does not show up in bytecode + + elif isinstance(item, PUSHLABEL): + # push a symbol to stack + label = item.label + bytecode = _compile_push_instruction(PUSH_N(symbol_map[label], n=SYMBOL_SIZE)) + ret.extend(bytecode) + + elif isinstance(item, Label): + jumpdest_opcode = get_opcodes()["JUMPDEST"][0] + assert jumpdest_opcode is not None # help mypy + ret.append(jumpdest_opcode) + + elif isinstance(item, PUSH_OFST): + # PUSH_OFST (LABEL foo) 32 + # PUSH_OFST (const foo) 32 + if isinstance(item.label, Label): + ofst = symbol_map[item.label] + item.ofst + bytecode = _compile_push_instruction(PUSH_N(ofst, SYMBOL_SIZE)) + else: + assert isinstance(item.label, CONSTREF) + ofst = const_map[item.label] + item.ofst + bytecode = _compile_push_instruction(PUSH(ofst)) + + ret.extend(bytecode) + + elif isinstance(item, int): + ret.append(item) + elif isinstance(item, str) and item.upper() in get_opcodes(): + opcode = get_opcodes()[item.upper()][0] + # TODO: fix signature of get_opcodes() + assert opcode is not None # help mypy + ret.append(opcode) + elif isinstance(item, DATA_ITEM): + ret.extend(_compile_data_item(item, symbol_map)) + elif item[:4] == "PUSH": + ret.append(PUSH_OFFSET + int(item[4:])) + elif item[:3] == "DUP": + ret.append(DUP_OFFSET + int(item[3:])) + elif item[:4] == "SWAP": + ret.append(SWAP_OFFSET + int(item[4:])) + else: # pragma: no cover + # unreachable + raise ValueError(f"Weird symbol in assembly: {type(item)} {item}") + + return bytes(ret) diff --git a/vyper/evm/optimizer.py b/vyper/evm/optimizer.py new file mode 100644 index 0000000000..99fc29850c --- /dev/null +++ b/vyper/evm/optimizer.py @@ -0,0 +1,249 @@ + +from vyper.evm.assembler import DATA_ITEM, PUSHLABEL, DataHeader, Label, is_symbol +from vyper.exceptions import CompilerPanic +from vyper.ir.optimizer import COMMUTATIVE_OPS + +_TERMINAL_OPS = ("JUMP", "RETURN", "REVERT", "STOP", "INVALID") + + +def _prune_unreachable_code(assembly): + # delete code between terminal ops and JUMPDESTS as those are + # unreachable + changed = False + i = 0 + while i < len(assembly) - 1: + if assembly[i] in _TERMINAL_OPS: + # find the next jumpdest or data section + for j in range(i + 1, len(assembly)): + next_is_reachable = isinstance(assembly[j], (Label, DataHeader)) + if next_is_reachable: + break + else: + # fixup an off-by-one if we made it to the end of the assembly + # without finding an jumpdest or sublist + j = len(assembly) + changed = j > i + 1 + del assembly[i + 1 : j] + + i += 1 + + return changed + + +def _prune_inefficient_jumps(assembly): + # prune sequences `PUSHLABEL x JUMP LABEL x` to `LABEL x` + changed = False + i = 0 + while i < len(assembly) - 2: + if ( + isinstance(assembly[i], PUSHLABEL) + and assembly[i + 1] == "JUMP" + and is_symbol(assembly[i + 2]) + and assembly[i + 2] == assembly[i].label + ): + # delete PUSHLABEL x JUMP + changed = True + del assembly[i : i + 2] + else: + i += 1 + + return changed + + +def _optimize_inefficient_jumps(assembly): + # optimize sequences + # `PUSHLABEL common JUMPI PUSHLABEL x JUMP LABEL common` + # to `ISZERO PUSHLABEL x JUMPI LABEL common` + changed = False + i = 0 + while i < len(assembly) - 4: + if ( + isinstance(assembly[i], PUSHLABEL) + and assembly[i + 1] == "JUMPI" + and isinstance(assembly[i + 2], PUSHLABEL) + and assembly[i + 3] == "JUMP" + and isinstance(assembly[i + 4], Label) + and assembly[i].label == assembly[i + 4] + ): + changed = True + assembly[i] = "ISZERO" + assembly[i + 1] = assembly[i + 2] + assembly[i + 2] = "JUMPI" + del assembly[i + 3 : i + 4] + else: + i += 1 + + return changed + + +def _merge_jumpdests(assembly): + # When we have multiple JUMPDESTs in a row, or when a JUMPDEST + # is immediately followed by another JUMP, we can skip the + # intermediate jumps. + # (Usually a chain of JUMPs is created by a nested block, + # or some nested if statements.) + changed = False + i = 0 + while i < len(assembly) - 2: + # if is_symbol(assembly[i]) and assembly[i + 1] == "JUMPDEST": + if is_symbol(assembly[i]): + current_symbol = assembly[i] + if is_symbol(assembly[i + 1]): + # LABEL x LABEL y + # replace all instances of PUSHLABEL x with PUSHLABEL y + new_symbol = assembly[i + 1] + if new_symbol != current_symbol: + for j in range(len(assembly)): + if ( + isinstance(assembly[j], PUSHLABEL) + and assembly[j].label == current_symbol + ): + assembly[j].label = new_symbol + changed = True + elif isinstance(assembly[i + 1], PUSHLABEL) and assembly[i + 2] == "JUMP": + # LABEL x PUSHLABEL y JUMP + # replace all instances of PUSHLABEL x with PUSHLABEL y + new_symbol = assembly[i + 1].label + for j in range(len(assembly)): + if isinstance(assembly[j], PUSHLABEL) and assembly[j].label == current_symbol: + assembly[j].label = new_symbol + changed = True + + i += 1 + + return changed + + +_RETURNS_ZERO_OR_ONE = { + "LT", + "GT", + "SLT", + "SGT", + "EQ", + "ISZERO", + "CALL", + "STATICCALL", + "CALLCODE", + "DELEGATECALL", +} + + +def _merge_iszero(assembly): + changed = False + + i = 0 + # list of opcodes that return 0 or 1 + while i < len(assembly) - 2: + if ( + isinstance(assembly[i], str) + and assembly[i] in _RETURNS_ZERO_OR_ONE + and assembly[i + 1 : i + 3] == ["ISZERO", "ISZERO"] + ): + changed = True + # drop the extra iszeros + del assembly[i + 1 : i + 3] + else: + i += 1 + i = 0 + while i < len(assembly) - 3: + # ISZERO ISZERO could map truthy to 1, + # but it could also just be a no-op before JUMPI. + if ( + assembly[i : i + 2] == ["ISZERO", "ISZERO"] + and isinstance(assembly[i + 2], PUSHLABEL) + and assembly[i + 3] == "JUMPI" + ): + changed = True + del assembly[i : i + 2] + else: + i += 1 + + return changed + + +def _prune_unused_jumpdests(assembly): + changed = False + + used_jumpdests: set[Label] = set() + + # find all used jumpdests + for item in assembly: + if isinstance(item, PUSHLABEL): + used_jumpdests.add(item.label) + + if isinstance(item, DATA_ITEM) and isinstance(item.data, Label): + # add symbols used in data sections as they are likely + # used for a jumptable. + used_jumpdests.add(item.data) + + # delete jumpdests that aren't used + i = 0 + while i < len(assembly): + if is_symbol(assembly[i]) and assembly[i] not in used_jumpdests: + changed = True + del assembly[i] + else: + i += 1 + + return changed + + +def _stack_peephole_opts(assembly): + changed = False + i = 0 + while i < len(assembly) - 2: + if assembly[i : i + 3] == ["DUP1", "SWAP2", "SWAP1"]: + changed = True + del assembly[i + 2] + assembly[i] = "SWAP1" + assembly[i + 1] = "DUP2" + continue + # usually generated by with statements that return their input like + # (with x (...x)) + if assembly[i : i + 3] == ["DUP1", "SWAP1", "POP"]: + # DUP1 SWAP1 POP == no-op + changed = True + del assembly[i : i + 3] + continue + # usually generated by nested with statements that don't return like + # (with x (with y ...)) + if assembly[i : i + 3] == ["SWAP1", "POP", "POP"]: + # SWAP1 POP POP == POP POP + changed = True + del assembly[i] + continue + if ( + isinstance(assembly[i], str) + and assembly[i].startswith("SWAP") + and assembly[i] == assembly[i + 1] + ): + changed = True + del assembly[i : i + 2] + if assembly[i] == "SWAP1" and str(assembly[i + 1]).lower() in COMMUTATIVE_OPS: + changed = True + del assembly[i] + if assembly[i] == "DUP1" and assembly[i + 1] == "SWAP1": + changed = True + del assembly[i + 1] + i += 1 + + return changed + + +# optimize assembly, in place +def optimize_assembly(assembly): + for _ in range(1024): + changed = False + + changed |= _prune_unreachable_code(assembly) + changed |= _merge_iszero(assembly) + changed |= _merge_jumpdests(assembly) + changed |= _prune_inefficient_jumps(assembly) + changed |= _optimize_inefficient_jumps(assembly) + changed |= _prune_unused_jumpdests(assembly) + changed |= _stack_peephole_opts(assembly) + + if not changed: + return + + raise CompilerPanic("infinite loop detected during assembly reduction") # pragma: nocover diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 9aa6906aef..582c539698 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -10,233 +10,13 @@ from vyper.codegen.ir_node import IRnode from vyper.compiler.settings import OptimizationLevel from vyper.evm.opcodes import get_opcodes, version_check +from vyper.evm.optimizer import optimize_assembly from vyper.exceptions import CodegenPanic, CompilerPanic +from vyper.evm.assembler import CONST, CONSTREF, DATA_ITEM, JUMP, JUMPI, PUSH, PUSH_OFST, PUSHLABEL, AssemblyInstruction, DataHeader, Label, TaggedInstruction, assembly_to_evm, get_data_segment_lengths, is_symbol, mkdebug from vyper.ir.optimizer import COMMUTATIVE_OPS from vyper.utils import MemoryPositions, OrderedSet from vyper.version import version_tuple -PUSH_OFFSET = 0x5F -DUP_OFFSET = 0x7F -SWAP_OFFSET = 0x8F - - -def num_to_bytearray(x): - o = [] - while x > 0: - o.insert(0, x % 256) - x //= 256 - return o - - -def PUSH(x): - bs = num_to_bytearray(x) - # starting in shanghai, can do push0 directly with no immediates - if len(bs) == 0 and not version_check(begin="shanghai"): - bs = [0] - return [f"PUSH{len(bs)}"] + bs - - -# push an exact number of bytes -def PUSH_N(x, n): - o = [] - for _i in range(n): - o.insert(0, x % 256) - x //= 256 - assert x == 0 - return [f"PUSH{len(o)}"] + o - - -##################################### -# assembly data structures and utils -##################################### - - -class Label: - def __init__(self, label: str): - assert isinstance(label, str) - self.label = label - - def __repr__(self): - return f"LABEL {self.label}" - - def __eq__(self, other): - if not isinstance(other, Label): - return False - return self.label == other.label - - def __hash__(self): - return hash(self.label) - - -@dataclass -class DataHeader: - label: Label - - def __repr__(self): - return f"DATA {self.label.label}" - - -# this could be fused with Label, the only difference is if -# it gets looked up from const_map or symbol_map. -class CONSTREF: - def __init__(self, label: str): - assert isinstance(label, str) - self.label = label - - def __repr__(self): - return f"CONSTREF {self.label}" - - def __eq__(self, other): - if not isinstance(other, CONSTREF): - return False - return self.label == other.label - - def __hash__(self): - return hash(self.label) - - -class CONST: - def __init__(self, name: str, value: int): - assert isinstance(name, str) - assert isinstance(value, int) - self.name = name - self.value = value - - def __repr__(self): - return f"CONST {self.name} {self.value}" - - def __eq__(self, other): - if not isinstance(other, CONST): - return False - return self.name == other.name and self.value == other.value - - -class BaseConstOp: - def __init__(self, name: str, op1: str | int, op2: str | int): - assert isinstance(name, str) - assert isinstance(op1, (str, int)) - assert isinstance(op2, (str, int)) - self.name = name - self.op1 = op1 - self.op2 = op2 - - def __eq__(self, other): - if not isinstance(other, type(self)): - return False - return self.name == other.name and self.op1 == other.op1 and self.op2 == other.op2 - - def _resolve_operand(self, operand: str | int, const_map: dict[CONSTREF, int]) -> int | None: - if isinstance(operand, str): - op_ref = CONSTREF(operand) - if op_ref in const_map: - return const_map[op_ref] - elif isinstance(operand, int): - return operand - return None - - def calculate(self, const_map: dict[CONSTREF, int]) -> int | None: - op1_val = self._resolve_operand(self.op1, const_map) - op2_val = self._resolve_operand(self.op2, const_map) - - if op1_val is not None and op2_val is not None: - return self._apply_operation(op1_val, op2_val) - return None - - def _apply_operation(self, op1_val: int, op2_val: int) -> int: - raise NotImplementedError("Subclasses must implement _apply_operation") - - -class CONST_ADD(BaseConstOp): - def __repr__(self): - return f"CONST_ADD {self.name} {self.op1} {self.op2}" - - def _apply_operation(self, op1_val: int, op2_val: int) -> int: - return op1_val + op2_val - - -class CONST_MAX(BaseConstOp): - def __repr__(self): - return f"CONST_MAX {self.name} {self.op1} {self.op2}" - - def _apply_operation(self, op1_val: int, op2_val: int) -> int: - return max(op1_val, op2_val) - - -class PUSHLABEL: - def __init__(self, label: Label): - assert isinstance(label, Label), label - self.label = label - - def __repr__(self): - return f"PUSHLABEL {self.label.label}" - - def __eq__(self, other): - if not isinstance(other, PUSHLABEL): - return False - return self.label == other.label - - def __hash__(self): - return hash(self.label) - - -# push the result of an addition (which might be resolvable at compile-time) -class PUSH_OFST: - def __init__(self, label: Label | CONSTREF, ofst: int): - # label can be Label or CONSTREF - assert isinstance(label, (Label, CONSTREF)) - self.label = label - self.ofst = ofst - - def __repr__(self): - label = self.label - if isinstance(label, Label): - label = label.label # str - return f"PUSH_OFST({label}, {self.ofst})" - - def __eq__(self, other): - if not isinstance(other, PUSH_OFST): - return False - return self.label == other.label and self.ofst == other.ofst - - def __hash__(self): - return hash((self.label, self.ofst)) - - -class DATA_ITEM: - def __init__(self, item: bytes | Label): - self.data = item - - def __repr__(self): - if isinstance(self.data, bytes): - return f"DATABYTES {self.data.hex()}" - elif isinstance(self.data, Label): - return f"DATALABEL {self.data.label}" - - -def JUMP(label: Label): - return [PUSHLABEL(label), "JUMP"] - - -def JUMPI(label: Label): - return [PUSHLABEL(label), "JUMPI"] - - -def mkdebug(pc_debugger, ast_source): - # compile debug instructions - # (this is dead code -- CMC 2025-05-08) - i = TaggedInstruction("DEBUG", ast_source) - i.pc_debugger = pc_debugger - return [i] - - -def is_symbol(i): - return isinstance(i, Label) - - -def is_ofst(assembly_item): - return isinstance(assembly_item, PUSH_OFST) - - def generate_cbor_metadata( compiler_metadata: Any, runtime_codesize: int, @@ -281,17 +61,6 @@ def _runtime_code_offsets(ctor_mem_size, runtime_codelen): return runtime_code_start, runtime_code_end -# Calculate the size of PUSH instruction -def calc_push_size(val: int): - # stupid implementation. this is "slow", but its correctness is - # obvious verify, as opposed to - # ``` - # (val.bit_length() + 7) // 8 - # + (1 - # if (val > 0 or version_check(begin="shanghai")) - # else 0) - # ``` - return len(PUSH(val)) # temporary optimization to handle stack items for return sequences @@ -332,18 +101,6 @@ def _rewrite_return_sequences(ir_node, label_params=None): _rewrite_return_sequences(t, label_params) -# a string (assembly instruction) but with additional metadata from the source code -class TaggedInstruction(str): - def __new__(cls, sstr, *args, **kwargs): - return super().__new__(cls, sstr) - - def __init__(self, sstr, ast_source=None, error_msg=None): - self.error_msg = error_msg - self.pc_debugger = False - - self.ast_source = ast_source - - ############################## # IRnode to assembly ############################## @@ -376,14 +133,6 @@ def compile_to_assembly( optimize_assembly(res) return res - -# TODO: move all these assembly data structures to own module, like -# vyper.evm.assembly -AssemblyInstruction = ( - str | TaggedInstruction | int | PUSHLABEL | Label | PUSH_OFST | DATA_ITEM | DataHeader | CONST -) - - class _IRnodeLowerer: # map from variable names to height in stack withargs: dict[str, int] @@ -1012,559 +761,7 @@ def _assert_false(self): return JUMPI(self.global_revert_label) -############################## -# assembly to evm utils -############################## - - -def note_line_num(line_number_map, pc, item): - # Record AST attached to pc - if isinstance(item, TaggedInstruction): - if (ast_node := item.ast_source) is not None: - ast_node = ast_node.get_original_node() - if hasattr(ast_node, "node_id"): - line_number_map["pc_raw_ast_map"][pc] = ast_node - - if item.error_msg is not None: - line_number_map["error_map"][pc] = item.error_msg - - note_breakpoint(line_number_map, pc, item) - - -def note_breakpoint(line_number_map, pc, item): - # Record line number attached to pc - if item == "DEBUG": - # Is PC debugger, create PC breakpoint. - if item.pc_debugger: - line_number_map["pc_breakpoints"].add(pc) - # Create line number breakpoint. - else: - line_number_map["breakpoints"].add(item.lineno + 1) - - -############################## -# assembly optimizer -############################## - -_TERMINAL_OPS = ("JUMP", "RETURN", "REVERT", "STOP", "INVALID") - - -def _prune_unreachable_code(assembly): - # delete code between terminal ops and JUMPDESTS as those are - # unreachable - changed = False - i = 0 - while i < len(assembly) - 1: - if assembly[i] in _TERMINAL_OPS: - # find the next jumpdest or data section - for j in range(i + 1, len(assembly)): - next_is_reachable = isinstance(assembly[j], (Label, DataHeader)) - if next_is_reachable: - break - else: - # fixup an off-by-one if we made it to the end of the assembly - # without finding an jumpdest or sublist - j = len(assembly) - changed = j > i + 1 - del assembly[i + 1 : j] - - i += 1 - - return changed - - -def _prune_inefficient_jumps(assembly): - # prune sequences `PUSHLABEL x JUMP LABEL x` to `LABEL x` - changed = False - i = 0 - while i < len(assembly) - 2: - if ( - isinstance(assembly[i], PUSHLABEL) - and assembly[i + 1] == "JUMP" - and is_symbol(assembly[i + 2]) - and assembly[i + 2] == assembly[i].label - ): - # delete PUSHLABEL x JUMP - changed = True - del assembly[i : i + 2] - else: - i += 1 - - return changed - - -def _optimize_inefficient_jumps(assembly): - # optimize sequences - # `PUSHLABEL common JUMPI PUSHLABEL x JUMP LABEL common` - # to `ISZERO PUSHLABEL x JUMPI LABEL common` - changed = False - i = 0 - while i < len(assembly) - 4: - if ( - isinstance(assembly[i], PUSHLABEL) - and assembly[i + 1] == "JUMPI" - and isinstance(assembly[i + 2], PUSHLABEL) - and assembly[i + 3] == "JUMP" - and isinstance(assembly[i + 4], Label) - and assembly[i].label == assembly[i + 4] - ): - changed = True - assembly[i] = "ISZERO" - assembly[i + 1] = assembly[i + 2] - assembly[i + 2] = "JUMPI" - del assembly[i + 3 : i + 4] - else: - i += 1 - - return changed - - -def _merge_jumpdests(assembly): - # When we have multiple JUMPDESTs in a row, or when a JUMPDEST - # is immediately followed by another JUMP, we can skip the - # intermediate jumps. - # (Usually a chain of JUMPs is created by a nested block, - # or some nested if statements.) - changed = False - i = 0 - while i < len(assembly) - 2: - # if is_symbol(assembly[i]) and assembly[i + 1] == "JUMPDEST": - if is_symbol(assembly[i]): - current_symbol = assembly[i] - if is_symbol(assembly[i + 1]): - # LABEL x LABEL y - # replace all instances of PUSHLABEL x with PUSHLABEL y - new_symbol = assembly[i + 1] - if new_symbol != current_symbol: - for j in range(len(assembly)): - if ( - isinstance(assembly[j], PUSHLABEL) - and assembly[j].label == current_symbol - ): - assembly[j].label = new_symbol - changed = True - elif isinstance(assembly[i + 1], PUSHLABEL) and assembly[i + 2] == "JUMP": - # LABEL x PUSHLABEL y JUMP - # replace all instances of PUSHLABEL x with PUSHLABEL y - new_symbol = assembly[i + 1].label - for j in range(len(assembly)): - if isinstance(assembly[j], PUSHLABEL) and assembly[j].label == current_symbol: - assembly[j].label = new_symbol - changed = True - - i += 1 - - return changed - - -_RETURNS_ZERO_OR_ONE = { - "LT", - "GT", - "SLT", - "SGT", - "EQ", - "ISZERO", - "CALL", - "STATICCALL", - "CALLCODE", - "DELEGATECALL", -} - - -def _merge_iszero(assembly): - changed = False - - i = 0 - # list of opcodes that return 0 or 1 - while i < len(assembly) - 2: - if ( - isinstance(assembly[i], str) - and assembly[i] in _RETURNS_ZERO_OR_ONE - and assembly[i + 1 : i + 3] == ["ISZERO", "ISZERO"] - ): - changed = True - # drop the extra iszeros - del assembly[i + 1 : i + 3] - else: - i += 1 - i = 0 - while i < len(assembly) - 3: - # ISZERO ISZERO could map truthy to 1, - # but it could also just be a no-op before JUMPI. - if ( - assembly[i : i + 2] == ["ISZERO", "ISZERO"] - and isinstance(assembly[i + 2], PUSHLABEL) - and assembly[i + 3] == "JUMPI" - ): - changed = True - del assembly[i : i + 2] - else: - i += 1 - - return changed - - -def _prune_unused_jumpdests(assembly): - changed = False - - used_jumpdests: set[Label] = set() - - # find all used jumpdests - for item in assembly: - if isinstance(item, PUSHLABEL): - used_jumpdests.add(item.label) - - if isinstance(item, DATA_ITEM) and isinstance(item.data, Label): - # add symbols used in data sections as they are likely - # used for a jumptable. - used_jumpdests.add(item.data) - - # delete jumpdests that aren't used - i = 0 - while i < len(assembly): - if is_symbol(assembly[i]) and assembly[i] not in used_jumpdests: - changed = True - del assembly[i] - else: - i += 1 - - return changed - - -def _stack_peephole_opts(assembly): - changed = False - i = 0 - while i < len(assembly) - 2: - if assembly[i : i + 3] == ["DUP1", "SWAP2", "SWAP1"]: - changed = True - del assembly[i + 2] - assembly[i] = "SWAP1" - assembly[i + 1] = "DUP2" - continue - # usually generated by with statements that return their input like - # (with x (...x)) - if assembly[i : i + 3] == ["DUP1", "SWAP1", "POP"]: - # DUP1 SWAP1 POP == no-op - changed = True - del assembly[i : i + 3] - continue - # usually generated by nested with statements that don't return like - # (with x (with y ...)) - if assembly[i : i + 3] == ["SWAP1", "POP", "POP"]: - # SWAP1 POP POP == POP POP - changed = True - del assembly[i] - continue - if ( - isinstance(assembly[i], str) - and assembly[i].startswith("SWAP") - and assembly[i] == assembly[i + 1] - ): - changed = True - del assembly[i : i + 2] - if assembly[i] == "SWAP1" and str(assembly[i + 1]).lower() in COMMUTATIVE_OPS: - changed = True - del assembly[i] - if assembly[i] == "DUP1" and assembly[i + 1] == "SWAP1": - changed = True - del assembly[i + 1] - i += 1 - - return changed - - -# optimize assembly, in place -def optimize_assembly(assembly): - for _ in range(1024): - changed = False - - changed |= _prune_unreachable_code(assembly) - changed |= _merge_iszero(assembly) - changed |= _merge_jumpdests(assembly) - changed |= _prune_inefficient_jumps(assembly) - changed |= _optimize_inefficient_jumps(assembly) - changed |= _prune_unused_jumpdests(assembly) - changed |= _stack_peephole_opts(assembly) - - if not changed: - return - - raise CompilerPanic("infinite loop detected during assembly reduction") # pragma: nocover - - -SYMBOL_SIZE = 2 # size of a PUSH instruction for a code symbol - - -# predict what length of an assembly [data] node will be in bytecode -def get_data_segment_lengths(assembly: list[AssemblyInstruction]) -> list[int]: - ret = [] - for item in assembly: - if isinstance(item, DataHeader): - ret.append(0) - continue - if len(ret) == 0: - # haven't yet seen a data header - continue - assert isinstance(item, DATA_ITEM) - if is_symbol(item.data): - ret[-1] += SYMBOL_SIZE - elif isinstance(item.data, bytes): - ret[-1] += len(item.data) - else: # pragma: nocover - raise ValueError(f"invalid data {type(item)} {item}") - - return ret - - -############################## -# assembly to evm bytecode -############################## - - -def _compile_data_item(item: DATA_ITEM, symbol_map: dict[Label, int]) -> bytes: - if isinstance(item.data, bytes): - return item.data - if isinstance(item.data, Label): - symbolbytes = symbol_map[item.data].to_bytes(SYMBOL_SIZE, "big") - return symbolbytes - - raise CompilerPanic(f"Invalid data {type(item.data)}, {item.data}") # pragma: nocover - - -T = TypeVar("T") -def _add_to_symbol_map(symbol_map: dict[T, int], item: T, value: int): - if item in symbol_map: # pragma: nocover - raise CompilerPanic(f"duplicate label: {item}") - symbol_map[item] = value -def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[str, Any]]: - """ - Generate bytecode and source map from assembly - - Returns: - bytecode: bytestring of the EVM bytecode - source_map: source map dict that gets output for the user - """ - # This API might seem a bit strange, but it's backwards compatible - symbol_map, const_map, source_map = resolve_symbols(assembly) - bytecode = _assembly_to_evm(assembly, symbol_map, const_map) - return bytecode, source_map - - -def _resolve_constants( - assembly: list[AssemblyInstruction], const_map: dict[CONSTREF, int] -) -> dict[CONSTREF, int]: - for item in assembly: - if isinstance(item, CONST): - _add_to_symbol_map(const_map, CONSTREF(item.name), item.value) - - while True: - changed = False - for item in assembly: - if isinstance(item, (CONST_ADD, CONST_MAX)): - # Skip if this constant is already resolved - if CONSTREF(item.name) in const_map: - continue - - # Calculate the value if possible - if (value := item.calculate(const_map)) is not None: - _add_to_symbol_map(const_map, CONSTREF(item.name), value) - changed = True - - if not changed: - break - - return const_map - - -# resolve symbols in assembly -def resolve_symbols( - assembly: list[AssemblyInstruction], -) -> tuple[dict[Label, int], dict[CONSTREF, int], dict[str, Any]]: - """ - Construct symbol map from assembly list - - Returns: - symbol_map: dict from labels to values - const_map: dict from CONSTREFs to values - source_map: source map dict that gets output for the user - """ - source_map: dict[str, Any] = { - "breakpoints": OrderedSet(), - "pc_breakpoints": OrderedSet(), - "pc_jump_map": {0: "-"}, - "pc_raw_ast_map": {}, - "error_map": {}, - } - - symbol_map: dict[Label, int] = {} - const_map: dict[CONSTREF, int] = {} - - pc: int = 0 - - const_map = _resolve_constants(assembly, const_map) - print("\n".join(str(item) for item in assembly)) - print(const_map) - - # resolve labels (i.e. JUMPDEST locations) to actual code locations, - # and simultaneously build the source map. - for i, item in enumerate(assembly): - # add it to the source map - note_line_num(source_map, pc, item) - - # update pc_jump_map - if item == "JUMP": - last = assembly[i - 1] - if isinstance(last, PUSHLABEL) and last.label.label.startswith("internal"): - if last.label.label.endswith("cleanup"): - # exit an internal function - source_map["pc_jump_map"][pc] = "o" - else: - # enter an internal function - source_map["pc_jump_map"][pc] = "i" - else: - # everything else - source_map["pc_jump_map"][pc] = "-" - elif item in ("JUMPI", "JUMPDEST"): - source_map["pc_jump_map"][pc] = "-" - - if item == "DEBUG": - continue # "debug" opcode does not go into bytecode - - if isinstance(item, CONST): - continue # CONST declarations do not go into bytecode - - # update pc - if isinstance(item, Label): - _add_to_symbol_map(symbol_map, item, pc) - pc += 1 # jumpdest - - elif isinstance(item, DataHeader): - # Don't increment pc as the symbol itself doesn't go into code - _add_to_symbol_map(symbol_map, item.label, pc) - - elif isinstance(item, PUSHLABEL): - pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits - - elif isinstance(item, PUSH_OFST): - assert isinstance(item.ofst, int), item - # [PUSH_OFST, (Label foo), bar] -> PUSH2 (foo+bar) - # [PUSH_OFST, _mem_foo, bar] -> PUSHN (foo+bar) - if isinstance(item.label, Label): - pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits - elif isinstance(item.label, CONSTREF): - const = const_map[item.label] - val = const + item.ofst - pc += calc_push_size(val) - else: # pragma: nocover - raise CompilerPanic(f"invalid ofst {item.label}") - - elif isinstance(item, DATA_ITEM): - if isinstance(item.data, Label): - pc += SYMBOL_SIZE - else: - assert isinstance(item.data, bytes) - pc += len(item.data) - elif isinstance(item, int): - assert 0 <= item < 256 - pc += 1 - else: - assert isinstance(item, str) and item in get_opcodes(), item - pc += 1 - - source_map["breakpoints"] = list(source_map["breakpoints"]) - source_map["pc_breakpoints"] = list(source_map["pc_breakpoints"]) - - # magic -- probably the assembler should actually add this label - _add_to_symbol_map(symbol_map, Label("code_end"), pc) - - return symbol_map, const_map, source_map - - -# helper function -def _compile_push_instruction(assembly: list[AssemblyInstruction]) -> bytes: - push_mnemonic = assembly[0] - assert isinstance(push_mnemonic, str) and push_mnemonic.startswith("PUSH") - push_instr = PUSH_OFFSET + int(push_mnemonic[4:]) - ret = [push_instr] - - for item in assembly[1:]: - assert isinstance(item, int) - ret.append(item) - return bytes(ret) - - -def _assembly_to_evm( - assembly: list[AssemblyInstruction], - symbol_map: dict[Label, int], - const_map: dict[CONSTREF, int], -) -> bytes: - """ - Assembles assembly into EVM bytecode - - Parameters: - assembly: list of asm instructions - symbol_map: dict from labels to resolved locations in the code - const_map: dict from constrefs to their values - - Returns: bytes representing the bytecode - """ - ret = bytearray() - - # now that all symbols have been resolved, generate bytecode - # using the symbol map - for item in assembly: - if item in ("DEBUG",): - continue # skippable opcodes - elif isinstance(item, CONST): - continue # CONST things do not show up in bytecode - elif isinstance(item, DataHeader): - continue # DataHeader does not show up in bytecode - - elif isinstance(item, PUSHLABEL): - # push a symbol to stack - label = item.label - bytecode = _compile_push_instruction(PUSH_N(symbol_map[label], n=SYMBOL_SIZE)) - ret.extend(bytecode) - - elif isinstance(item, Label): - jumpdest_opcode = get_opcodes()["JUMPDEST"][0] - assert jumpdest_opcode is not None # help mypy - ret.append(jumpdest_opcode) - - elif isinstance(item, PUSH_OFST): - # PUSH_OFST (LABEL foo) 32 - # PUSH_OFST (const foo) 32 - if isinstance(item.label, Label): - ofst = symbol_map[item.label] + item.ofst - bytecode = _compile_push_instruction(PUSH_N(ofst, SYMBOL_SIZE)) - else: - assert isinstance(item.label, CONSTREF) - ofst = const_map[item.label] + item.ofst - bytecode = _compile_push_instruction(PUSH(ofst)) - - ret.extend(bytecode) - - elif isinstance(item, int): - ret.append(item) - elif isinstance(item, str) and item.upper() in get_opcodes(): - opcode = get_opcodes()[item.upper()][0] - # TODO: fix signature of get_opcodes() - assert opcode is not None # help mypy - ret.append(opcode) - elif isinstance(item, DATA_ITEM): - ret.extend(_compile_data_item(item, symbol_map)) - elif item[:4] == "PUSH": - ret.append(PUSH_OFFSET + int(item[4:])) - elif item[:3] == "DUP": - ret.append(DUP_OFFSET + int(item[3:])) - elif item[:4] == "SWAP": - ret.append(SWAP_OFFSET + int(item[4:])) - else: # pragma: no cover - # unreachable - raise ValueError(f"Weird symbol in assembly: {type(item)} {item}") - - return bytes(ret) diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index b04aba9cb7..f630713e5f 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -7,7 +7,7 @@ from vyper.compiler.settings import OptimizationLevel, Settings from vyper.evm.address_space import MEMORY, STORAGE, TRANSIENT from vyper.exceptions import CompilerPanic -from vyper.ir.compile_ir import AssemblyInstruction +from vyper.evm.assembler import AssemblyInstruction from vyper.venom.analysis import MemSSA from vyper.venom.analysis.analysis import IRAnalysesCache from vyper.venom.basicblock import IRLabel, IRLiteral diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 823f346ce1..6c9a2e431d 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -3,7 +3,7 @@ from typing import Any, Iterable from vyper.exceptions import CompilerPanic, StackTooDeep -from vyper.ir.compile_ir import ( +from vyper.evm.assembler import ( DATA_ITEM, PUSH, PUSH_OFST, @@ -12,8 +12,8 @@ DataHeader, Label, TaggedInstruction, - optimize_assembly, ) +from vyper.evm.optimizer import optimize_assembly from vyper.utils import MemoryPositions, OrderedSet, wrap256 from vyper.venom.analysis import CFGAnalysis, DFGAnalysis, IRAnalysesCache, LivenessAnalysis from vyper.venom.basicblock import ( From 619449066f72ea9d8a24afeff7fe3328b4351dd8 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jun 2025 15:15:33 +0300 Subject: [PATCH 071/172] lint --- tests/unit/compiler/ir/test_compile_ir.py | 5 ++-- vyper/evm/assembler.py | 7 +++-- vyper/evm/optimizer.py | 1 - vyper/ir/compile_ir.py | 36 ++++++++++++++--------- vyper/venom/__init__.py | 2 +- vyper/venom/venom_to_assembly.py | 2 +- 6 files changed, 32 insertions(+), 21 deletions(-) diff --git a/tests/unit/compiler/ir/test_compile_ir.py b/tests/unit/compiler/ir/test_compile_ir.py index 223d226272..d028b34b7a 100644 --- a/tests/unit/compiler/ir/test_compile_ir.py +++ b/tests/unit/compiler/ir/test_compile_ir.py @@ -1,9 +1,9 @@ import pytest from vyper.codegen.ir_node import IRnode +from vyper.evm.assembler import CONST, CONST_ADD, CONST_MAX, CONSTREF, _resolve_constants from vyper.evm.opcodes import version_check from vyper.ir import compile_ir -from vyper.evm.assembler import CONST, CONST_ADD, CONST_MAX, CONSTREF, _resolve_constants from vyper.ir.s_expressions import parse_s_exp fail_list = [ @@ -84,8 +84,9 @@ def test_const_add(): assert const_map[CONSTREF("c")] == 3 assert const_map[CONSTREF("d")] == 13 + def test_const_max(): asm = [CONST("a", 1), CONST("b", 2), CONST_MAX("c", "a", "b"), CONST_MAX("d", "c", 10)] const_map = _resolve_constants(asm, {}) assert const_map[CONSTREF("c")] == 2 - assert const_map[CONSTREF("d")] == 10 \ No newline at end of file + assert const_map[CONSTREF("d")] == 10 diff --git a/vyper/evm/assembler.py b/vyper/evm/assembler.py index 1d14258da4..fe8eb761b3 100644 --- a/vyper/evm/assembler.py +++ b/vyper/evm/assembler.py @@ -1,4 +1,3 @@ - ##################################### # assembly data structures and utils ##################################### @@ -185,6 +184,7 @@ def __repr__(self): elif isinstance(self.data, Label): return f"DATALABEL {self.data.label}" + # a string (assembly instruction) but with additional metadata from the source code class TaggedInstruction(str): def __new__(cls, sstr, *args, **kwargs): @@ -214,6 +214,7 @@ def PUSH_N(x, n): assert x == 0 return [f"PUSH{len(o)}"] + o + def JUMP(label: Label): return [PUSHLABEL(label), "JUMP"] @@ -379,6 +380,7 @@ def resolve_symbols( return symbol_map, const_map, source_map + # Calculate the size of PUSH instruction def calc_push_size(val: int): # stupid implementation. this is "slow", but its correctness is @@ -441,7 +443,6 @@ def get_data_segment_lengths(assembly: list[AssemblyInstruction]) -> list[int]: return ret - def _compile_data_item(item: DATA_ITEM, symbol_map: dict[Label, int]) -> bytes: if isinstance(item.data, bytes): return item.data @@ -464,6 +465,7 @@ def _compile_push_instruction(assembly: list[AssemblyInstruction]) -> bytes: ret.append(item) return bytes(ret) + def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[str, Any]]: """ Generate bytecode and source map from assembly @@ -477,6 +479,7 @@ def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[st bytecode = _assembly_to_evm(assembly, symbol_map, const_map) return bytecode, source_map + def _assembly_to_evm( assembly: list[AssemblyInstruction], symbol_map: dict[Label, int], diff --git a/vyper/evm/optimizer.py b/vyper/evm/optimizer.py index 99fc29850c..4ff8520468 100644 --- a/vyper/evm/optimizer.py +++ b/vyper/evm/optimizer.py @@ -1,4 +1,3 @@ - from vyper.evm.assembler import DATA_ITEM, PUSHLABEL, DataHeader, Label, is_symbol from vyper.exceptions import CompilerPanic from vyper.ir.optimizer import COMMUTATIVE_OPS diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 582c539698..e1034c1fa3 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -2,21 +2,36 @@ import contextlib import copy -from dataclasses import dataclass -from typing import Any, Optional, TypeVar +from typing import Any, Optional import cbor2 from vyper.codegen.ir_node import IRnode from vyper.compiler.settings import OptimizationLevel -from vyper.evm.opcodes import get_opcodes, version_check +from vyper.evm.assembler import ( + CONST, + CONSTREF, + DATA_ITEM, + JUMP, + JUMPI, + PUSH, + PUSH_OFST, + PUSHLABEL, + AssemblyInstruction, + DataHeader, + Label, + TaggedInstruction, + assembly_to_evm, + get_data_segment_lengths, + mkdebug, +) +from vyper.evm.opcodes import get_opcodes from vyper.evm.optimizer import optimize_assembly from vyper.exceptions import CodegenPanic, CompilerPanic -from vyper.evm.assembler import CONST, CONSTREF, DATA_ITEM, JUMP, JUMPI, PUSH, PUSH_OFST, PUSHLABEL, AssemblyInstruction, DataHeader, Label, TaggedInstruction, assembly_to_evm, get_data_segment_lengths, is_symbol, mkdebug -from vyper.ir.optimizer import COMMUTATIVE_OPS -from vyper.utils import MemoryPositions, OrderedSet +from vyper.utils import MemoryPositions from vyper.version import version_tuple + def generate_cbor_metadata( compiler_metadata: Any, runtime_codesize: int, @@ -61,8 +76,6 @@ def _runtime_code_offsets(ctor_mem_size, runtime_codelen): return runtime_code_start, runtime_code_end - - # temporary optimization to handle stack items for return sequences # like `return return_ofst return_len`. this is kind of brittle because # it assumes the arguments are already on the stack, to be replaced @@ -133,6 +146,7 @@ def compile_to_assembly( optimize_assembly(res) return res + class _IRnodeLowerer: # map from variable names to height in stack withargs: dict[str, int] @@ -759,9 +773,3 @@ def _assert_false(self): self.global_revert_label = self.mksymbol("revert") # use a shared failure block for common case of assert(x). return JUMPI(self.global_revert_label) - - - - - - diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index f630713e5f..0582719146 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -6,8 +6,8 @@ from vyper.codegen.ir_node import IRnode from vyper.compiler.settings import OptimizationLevel, Settings from vyper.evm.address_space import MEMORY, STORAGE, TRANSIENT -from vyper.exceptions import CompilerPanic from vyper.evm.assembler import AssemblyInstruction +from vyper.exceptions import CompilerPanic from vyper.venom.analysis import MemSSA from vyper.venom.analysis.analysis import IRAnalysesCache from vyper.venom.basicblock import IRLabel, IRLiteral diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 6c9a2e431d..c280373d8f 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -2,7 +2,6 @@ from typing import Any, Iterable -from vyper.exceptions import CompilerPanic, StackTooDeep from vyper.evm.assembler import ( DATA_ITEM, PUSH, @@ -14,6 +13,7 @@ TaggedInstruction, ) from vyper.evm.optimizer import optimize_assembly +from vyper.exceptions import CompilerPanic, StackTooDeep from vyper.utils import MemoryPositions, OrderedSet, wrap256 from vyper.venom.analysis import CFGAnalysis, DFGAnalysis, IRAnalysesCache, LivenessAnalysis from vyper.venom.basicblock import ( From c3f9bd8eae66cdda09c86845c281e4fe3cd44b5a Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jun 2025 15:59:03 +0300 Subject: [PATCH 072/172] move tests --- tests/unit/compiler/asm/test_assembler.py | 15 +++++++++++++++ tests/unit/compiler/ir/test_compile_ir.py | 15 --------------- vyper/evm/assembler.py | 5 ----- 3 files changed, 15 insertions(+), 20 deletions(-) create mode 100644 tests/unit/compiler/asm/test_assembler.py diff --git a/tests/unit/compiler/asm/test_assembler.py b/tests/unit/compiler/asm/test_assembler.py new file mode 100644 index 0000000000..ce94717c50 --- /dev/null +++ b/tests/unit/compiler/asm/test_assembler.py @@ -0,0 +1,15 @@ +from vyper.evm.assembler import CONST, CONST_ADD, CONST_MAX, CONSTREF, _resolve_constants + + +def test_const_add(): + asm = [CONST("a", 1), CONST("b", 2), CONST_ADD("c", "a", "b"), CONST_ADD("d", "c", 10)] + const_map = _resolve_constants(asm, {}) + assert const_map[CONSTREF("c")] == 3 + assert const_map[CONSTREF("d")] == 13 + + +def test_const_max(): + asm = [CONST("a", 1), CONST("b", 2), CONST_MAX("c", "a", "b"), CONST_MAX("d", "c", 10)] + const_map = _resolve_constants(asm, {}) + assert const_map[CONSTREF("c")] == 2 + assert const_map[CONSTREF("d")] == 10 diff --git a/tests/unit/compiler/ir/test_compile_ir.py b/tests/unit/compiler/ir/test_compile_ir.py index d028b34b7a..ba85297afb 100644 --- a/tests/unit/compiler/ir/test_compile_ir.py +++ b/tests/unit/compiler/ir/test_compile_ir.py @@ -1,7 +1,6 @@ import pytest from vyper.codegen.ir_node import IRnode -from vyper.evm.assembler import CONST, CONST_ADD, CONST_MAX, CONSTREF, _resolve_constants from vyper.evm.opcodes import version_check from vyper.ir import compile_ir from vyper.ir.s_expressions import parse_s_exp @@ -76,17 +75,3 @@ def test_pc_debugger(): offset = 5 assert line_number_map["pc_breakpoints"][0] == offset - - -def test_const_add(): - asm = [CONST("a", 1), CONST("b", 2), CONST_ADD("c", "a", "b"), CONST_ADD("d", "c", 10)] - const_map = _resolve_constants(asm, {}) - assert const_map[CONSTREF("c")] == 3 - assert const_map[CONSTREF("d")] == 13 - - -def test_const_max(): - asm = [CONST("a", 1), CONST("b", 2), CONST_MAX("c", "a", "b"), CONST_MAX("d", "c", 10)] - const_map = _resolve_constants(asm, {}) - assert const_map[CONSTREF("c")] == 2 - assert const_map[CONSTREF("d")] == 10 diff --git a/vyper/evm/assembler.py b/vyper/evm/assembler.py index fe8eb761b3..8edd32cdb7 100644 --- a/vyper/evm/assembler.py +++ b/vyper/evm/assembler.py @@ -1,8 +1,3 @@ -##################################### -# assembly data structures and utils -##################################### - - from dataclasses import dataclass from typing import Any, TypeVar From 0ed619b8aff7daff8deea6c1040646ff8ed8e504 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jun 2025 16:11:42 +0300 Subject: [PATCH 073/172] merge const_map to symbol_map --- tests/unit/compiler/asm/test_assembler.py | 14 ++++--- vyper/evm/assembler.py | 51 +++++++++-------------- 2 files changed, 27 insertions(+), 38 deletions(-) diff --git a/tests/unit/compiler/asm/test_assembler.py b/tests/unit/compiler/asm/test_assembler.py index ce94717c50..20c572a50e 100644 --- a/tests/unit/compiler/asm/test_assembler.py +++ b/tests/unit/compiler/asm/test_assembler.py @@ -3,13 +3,15 @@ def test_const_add(): asm = [CONST("a", 1), CONST("b", 2), CONST_ADD("c", "a", "b"), CONST_ADD("d", "c", 10)] - const_map = _resolve_constants(asm, {}) - assert const_map[CONSTREF("c")] == 3 - assert const_map[CONSTREF("d")] == 13 + symbol_map = {} + _resolve_constants(asm, symbol_map) + assert symbol_map[CONSTREF("c")] == 3 + assert symbol_map[CONSTREF("d")] == 13 def test_const_max(): asm = [CONST("a", 1), CONST("b", 2), CONST_MAX("c", "a", "b"), CONST_MAX("d", "c", 10)] - const_map = _resolve_constants(asm, {}) - assert const_map[CONSTREF("c")] == 2 - assert const_map[CONSTREF("d")] == 10 + symbol_map = {} + _resolve_constants(asm, symbol_map) + assert symbol_map[CONSTREF("c")] == 2 + assert symbol_map[CONSTREF("d")] == 10 diff --git a/vyper/evm/assembler.py b/vyper/evm/assembler.py index 8edd32cdb7..b408dcaded 100644 --- a/vyper/evm/assembler.py +++ b/vyper/evm/assembler.py @@ -9,6 +9,7 @@ DUP_OFFSET = 0x7F SWAP_OFFSET = 0x8F +T = TypeVar("T") def num_to_bytearray(x): o = [] @@ -43,8 +44,6 @@ def __repr__(self): return f"DATA {self.label.label}" -# this could be fused with Label, the only difference is if -# it gets looked up from const_map or symbol_map. class CONSTREF: def __init__(self, label: str): assert isinstance(label, str) @@ -92,18 +91,18 @@ def __eq__(self, other): return False return self.name == other.name and self.op1 == other.op1 and self.op2 == other.op2 - def _resolve_operand(self, operand: str | int, const_map: dict[CONSTREF, int]) -> int | None: + def _resolve_operand(self, operand: str | int, symbol_map: dict[T, int]) -> int | None: if isinstance(operand, str): op_ref = CONSTREF(operand) - if op_ref in const_map: - return const_map[op_ref] + if op_ref in symbol_map: + return symbol_map[op_ref] elif isinstance(operand, int): return operand return None - def calculate(self, const_map: dict[CONSTREF, int]) -> int | None: - op1_val = self._resolve_operand(self.op1, const_map) - op2_val = self._resolve_operand(self.op2, const_map) + def calculate(self, symbol_map: dict[CONSTREF, int]) -> int | None: + op1_val = self._resolve_operand(self.op1, symbol_map) + op2_val = self._resolve_operand(self.op2, symbol_map) if op1_val is not None and op2_val is not None: return self._apply_operation(op1_val, op2_val) @@ -238,9 +237,6 @@ def is_ofst(assembly_item): str | TaggedInstruction | int | PUSHLABEL | Label | PUSH_OFST | DATA_ITEM | DataHeader | CONST ) -T = TypeVar("T") - - def _add_to_symbol_map(symbol_map: dict[T, int], item: T, value: int): if item in symbol_map: # pragma: nocover raise CompilerPanic(f"duplicate label: {item}") @@ -248,31 +244,28 @@ def _add_to_symbol_map(symbol_map: dict[T, int], item: T, value: int): def _resolve_constants( - assembly: list[AssemblyInstruction], const_map: dict[CONSTREF, int] -) -> dict[CONSTREF, int]: + assembly: list[AssemblyInstruction], symbol_map: dict[T, int] +): for item in assembly: if isinstance(item, CONST): - _add_to_symbol_map(const_map, CONSTREF(item.name), item.value) + _add_to_symbol_map(symbol_map, CONSTREF(item.name), item.value) while True: changed = False for item in assembly: if isinstance(item, (CONST_ADD, CONST_MAX)): # Skip if this constant is already resolved - if CONSTREF(item.name) in const_map: + if CONSTREF(item.name) in symbol_map: continue # Calculate the value if possible - if (value := item.calculate(const_map)) is not None: - _add_to_symbol_map(const_map, CONSTREF(item.name), value) + if (value := item.calculate(symbol_map)) is not None: + _add_to_symbol_map(symbol_map, CONSTREF(item.name), value) changed = True if not changed: break - return const_map - - def resolve_symbols( assembly: list[AssemblyInstruction], ) -> tuple[dict[Label, int], dict[CONSTREF, int], dict[str, Any]]: @@ -281,7 +274,6 @@ def resolve_symbols( Returns: symbol_map: dict from labels to values - const_map: dict from CONSTREFs to values source_map: source map dict that gets output for the user """ source_map: dict[str, Any] = { @@ -293,13 +285,10 @@ def resolve_symbols( } symbol_map: dict[Label, int] = {} - const_map: dict[CONSTREF, int] = {} pc: int = 0 - const_map = _resolve_constants(assembly, const_map) - print("\n".join(str(item) for item in assembly)) - print(const_map) + _resolve_constants(assembly, symbol_map) # resolve labels (i.e. JUMPDEST locations) to actual code locations, # and simultaneously build the source map. @@ -348,7 +337,7 @@ def resolve_symbols( if isinstance(item.label, Label): pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits elif isinstance(item.label, CONSTREF): - const = const_map[item.label] + const = symbol_map[item.label] val = const + item.ofst pc += calc_push_size(val) else: # pragma: nocover @@ -373,7 +362,7 @@ def resolve_symbols( # magic -- probably the assembler should actually add this label _add_to_symbol_map(symbol_map, Label("code_end"), pc) - return symbol_map, const_map, source_map + return symbol_map, source_map # Calculate the size of PUSH instruction @@ -470,15 +459,14 @@ def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[st source_map: source map dict that gets output for the user """ # This API might seem a bit strange, but it's backwards compatible - symbol_map, const_map, source_map = resolve_symbols(assembly) - bytecode = _assembly_to_evm(assembly, symbol_map, const_map) + symbol_map, source_map = resolve_symbols(assembly) + bytecode = _assembly_to_evm(assembly, symbol_map) return bytecode, source_map def _assembly_to_evm( assembly: list[AssemblyInstruction], symbol_map: dict[Label, int], - const_map: dict[CONSTREF, int], ) -> bytes: """ Assembles assembly into EVM bytecode @@ -486,7 +474,6 @@ def _assembly_to_evm( Parameters: assembly: list of asm instructions symbol_map: dict from labels to resolved locations in the code - const_map: dict from constrefs to their values Returns: bytes representing the bytecode """ @@ -521,7 +508,7 @@ def _assembly_to_evm( bytecode = _compile_push_instruction(PUSH_N(ofst, SYMBOL_SIZE)) else: assert isinstance(item.label, CONSTREF) - ofst = const_map[item.label] + item.ofst + ofst = symbol_map[item.label] + item.ofst bytecode = _compile_push_instruction(PUSH(ofst)) ret.extend(bytecode) From 02038b8891f8c8dc7468504eaeb3f84ac2f1abe1 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jun 2025 16:22:54 +0300 Subject: [PATCH 074/172] reorg files --- tests/unit/compiler/asm/test_asm_optimizer.py | 4 ++-- tests/unit/compiler/asm/test_assembler.py | 2 +- vyper/evm/assembler/__init__.py | 4 ++++ vyper/evm/{assembler.py => assembler/core.py} | 12 +++++------- vyper/evm/{ => assembler}/optimizer.py | 4 ++-- vyper/evm/constants.py | 2 ++ vyper/ir/compile_ir.py | 4 ++-- vyper/venom/__init__.py | 2 +- vyper/venom/venom_to_assembly.py | 4 ++-- 9 files changed, 21 insertions(+), 17 deletions(-) create mode 100644 vyper/evm/assembler/__init__.py rename vyper/evm/{assembler.py => assembler/core.py} (98%) rename vyper/evm/{ => assembler}/optimizer.py (98%) create mode 100644 vyper/evm/constants.py diff --git a/tests/unit/compiler/asm/test_asm_optimizer.py b/tests/unit/compiler/asm/test_asm_optimizer.py index 8b21b02583..230d1b30c4 100644 --- a/tests/unit/compiler/asm/test_asm_optimizer.py +++ b/tests/unit/compiler/asm/test_asm_optimizer.py @@ -3,8 +3,8 @@ from vyper.compiler import compile_code from vyper.compiler.phases import CompilerData from vyper.compiler.settings import OptimizationLevel, Settings -from vyper.evm.assembler import PUSHLABEL, Label -from vyper.evm.optimizer import _merge_jumpdests +from vyper.evm.assembler.core import PUSHLABEL, Label +from vyper.evm.assembler.optimizer import _merge_jumpdests codes = [ """ diff --git a/tests/unit/compiler/asm/test_assembler.py b/tests/unit/compiler/asm/test_assembler.py index 20c572a50e..dbf405f53b 100644 --- a/tests/unit/compiler/asm/test_assembler.py +++ b/tests/unit/compiler/asm/test_assembler.py @@ -1,4 +1,4 @@ -from vyper.evm.assembler import CONST, CONST_ADD, CONST_MAX, CONSTREF, _resolve_constants +from vyper.evm.assembler.core import CONST, CONST_ADD, CONST_MAX, CONSTREF, _resolve_constants def test_const_add(): diff --git a/vyper/evm/assembler/__init__.py b/vyper/evm/assembler/__init__.py new file mode 100644 index 0000000000..c6955aa192 --- /dev/null +++ b/vyper/evm/assembler/__init__.py @@ -0,0 +1,4 @@ +from vyper.evm.assembler.core import assembly_to_evm, resolve_symbols +from vyper.evm.assembler.optimizer import optimize_assembly + +__all__ = ["assembly_to_evm", "resolve_symbols", "optimize_assembly"] diff --git a/vyper/evm/assembler.py b/vyper/evm/assembler/core.py similarity index 98% rename from vyper/evm/assembler.py rename to vyper/evm/assembler/core.py index b408dcaded..0eeb706e69 100644 --- a/vyper/evm/assembler.py +++ b/vyper/evm/assembler/core.py @@ -11,6 +11,7 @@ T = TypeVar("T") + def num_to_bytearray(x): o = [] while x > 0: @@ -237,15 +238,14 @@ def is_ofst(assembly_item): str | TaggedInstruction | int | PUSHLABEL | Label | PUSH_OFST | DATA_ITEM | DataHeader | CONST ) + def _add_to_symbol_map(symbol_map: dict[T, int], item: T, value: int): if item in symbol_map: # pragma: nocover raise CompilerPanic(f"duplicate label: {item}") symbol_map[item] = value -def _resolve_constants( - assembly: list[AssemblyInstruction], symbol_map: dict[T, int] -): +def _resolve_constants(assembly: list[AssemblyInstruction], symbol_map: dict[T, int]): for item in assembly: if isinstance(item, CONST): _add_to_symbol_map(symbol_map, CONSTREF(item.name), item.value) @@ -266,6 +266,7 @@ def _resolve_constants( if not changed: break + def resolve_symbols( assembly: list[AssemblyInstruction], ) -> tuple[dict[Label, int], dict[CONSTREF, int], dict[str, Any]]: @@ -464,10 +465,7 @@ def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[st return bytecode, source_map -def _assembly_to_evm( - assembly: list[AssemblyInstruction], - symbol_map: dict[Label, int], -) -> bytes: +def _assembly_to_evm(assembly: list[AssemblyInstruction], symbol_map: dict[Label, int]) -> bytes: """ Assembles assembly into EVM bytecode diff --git a/vyper/evm/optimizer.py b/vyper/evm/assembler/optimizer.py similarity index 98% rename from vyper/evm/optimizer.py rename to vyper/evm/assembler/optimizer.py index 4ff8520468..e8cd02e161 100644 --- a/vyper/evm/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -1,6 +1,6 @@ -from vyper.evm.assembler import DATA_ITEM, PUSHLABEL, DataHeader, Label, is_symbol +from vyper.evm.assembler.core import DATA_ITEM, PUSHLABEL, DataHeader, Label, is_symbol +from vyper.evm.constants import COMMUTATIVE_OPS from vyper.exceptions import CompilerPanic -from vyper.ir.optimizer import COMMUTATIVE_OPS _TERMINAL_OPS = ("JUMP", "RETURN", "REVERT", "STOP", "INVALID") diff --git a/vyper/evm/constants.py b/vyper/evm/constants.py new file mode 100644 index 0000000000..daafcb97ab --- /dev/null +++ b/vyper/evm/constants.py @@ -0,0 +1,2 @@ +# Commutative operations in EVM +COMMUTATIVE_OPS = {"ADD", "MUL", "EQ", "AND", "OR", "XOR"} diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index e1034c1fa3..41e3e674a6 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -8,7 +8,7 @@ from vyper.codegen.ir_node import IRnode from vyper.compiler.settings import OptimizationLevel -from vyper.evm.assembler import ( +from vyper.evm.assembler.core import ( CONST, CONSTREF, DATA_ITEM, @@ -25,8 +25,8 @@ get_data_segment_lengths, mkdebug, ) +from vyper.evm.assembler.optimizer import optimize_assembly from vyper.evm.opcodes import get_opcodes -from vyper.evm.optimizer import optimize_assembly from vyper.exceptions import CodegenPanic, CompilerPanic from vyper.utils import MemoryPositions from vyper.version import version_tuple diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 0582719146..3598e53062 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -6,7 +6,7 @@ from vyper.codegen.ir_node import IRnode from vyper.compiler.settings import OptimizationLevel, Settings from vyper.evm.address_space import MEMORY, STORAGE, TRANSIENT -from vyper.evm.assembler import AssemblyInstruction +from vyper.evm.assembler.core import AssemblyInstruction from vyper.exceptions import CompilerPanic from vyper.venom.analysis import MemSSA from vyper.venom.analysis.analysis import IRAnalysesCache diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index c280373d8f..12ac05fe22 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -2,7 +2,7 @@ from typing import Any, Iterable -from vyper.evm.assembler import ( +from vyper.evm.assembler.core import ( DATA_ITEM, PUSH, PUSH_OFST, @@ -12,7 +12,7 @@ Label, TaggedInstruction, ) -from vyper.evm.optimizer import optimize_assembly +from vyper.evm.assembler.optimizer import optimize_assembly from vyper.exceptions import CompilerPanic, StackTooDeep from vyper.utils import MemoryPositions, OrderedSet, wrap256 from vyper.venom.analysis import CFGAnalysis, DFGAnalysis, IRAnalysesCache, LivenessAnalysis From 25003995860fa3db377a6119fc434c79d54df19b Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jun 2025 16:27:55 +0300 Subject: [PATCH 075/172] typing gymnastics --- vyper/evm/assembler/core.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 0eeb706e69..b96256e581 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Any, TypeVar +from typing import Any from vyper.evm.opcodes import get_opcodes, version_check from vyper.exceptions import CompilerPanic @@ -9,8 +9,6 @@ DUP_OFFSET = 0x7F SWAP_OFFSET = 0x8F -T = TypeVar("T") - def num_to_bytearray(x): o = [] @@ -62,6 +60,9 @@ def __hash__(self): return hash(self.label) +SymbolKey = Label | CONSTREF + + class CONST: def __init__(self, name: str, value: int): assert isinstance(name, str) @@ -92,7 +93,7 @@ def __eq__(self, other): return False return self.name == other.name and self.op1 == other.op1 and self.op2 == other.op2 - def _resolve_operand(self, operand: str | int, symbol_map: dict[T, int]) -> int | None: + def _resolve_operand(self, operand: str | int, symbol_map: dict[SymbolKey, int]) -> int | None: if isinstance(operand, str): op_ref = CONSTREF(operand) if op_ref in symbol_map: @@ -101,7 +102,7 @@ def _resolve_operand(self, operand: str | int, symbol_map: dict[T, int]) -> int return operand return None - def calculate(self, symbol_map: dict[CONSTREF, int]) -> int | None: + def calculate(self, symbol_map: dict[SymbolKey, int]) -> int | None: op1_val = self._resolve_operand(self.op1, symbol_map) op2_val = self._resolve_operand(self.op2, symbol_map) @@ -239,13 +240,13 @@ def is_ofst(assembly_item): ) -def _add_to_symbol_map(symbol_map: dict[T, int], item: T, value: int): +def _add_to_symbol_map(symbol_map: dict[SymbolKey, int], item: SymbolKey, value: int): if item in symbol_map: # pragma: nocover raise CompilerPanic(f"duplicate label: {item}") symbol_map[item] = value -def _resolve_constants(assembly: list[AssemblyInstruction], symbol_map: dict[T, int]): +def _resolve_constants(assembly: list[AssemblyInstruction], symbol_map: dict[SymbolKey, int]): for item in assembly: if isinstance(item, CONST): _add_to_symbol_map(symbol_map, CONSTREF(item.name), item.value) @@ -269,7 +270,7 @@ def _resolve_constants(assembly: list[AssemblyInstruction], symbol_map: dict[T, def resolve_symbols( assembly: list[AssemblyInstruction], -) -> tuple[dict[Label, int], dict[CONSTREF, int], dict[str, Any]]: +) -> tuple[dict[SymbolKey, int], dict[str, Any]]: """ Construct symbol map from assembly list @@ -285,7 +286,7 @@ def resolve_symbols( "error_map": {}, } - symbol_map: dict[Label, int] = {} + symbol_map: dict[SymbolKey, int] = {} pc: int = 0 @@ -428,7 +429,7 @@ def get_data_segment_lengths(assembly: list[AssemblyInstruction]) -> list[int]: return ret -def _compile_data_item(item: DATA_ITEM, symbol_map: dict[Label, int]) -> bytes: +def _compile_data_item(item: DATA_ITEM, symbol_map: dict[SymbolKey, int]) -> bytes: if isinstance(item.data, bytes): return item.data if isinstance(item.data, Label): @@ -465,7 +466,9 @@ def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[st return bytecode, source_map -def _assembly_to_evm(assembly: list[AssemblyInstruction], symbol_map: dict[Label, int]) -> bytes: +def _assembly_to_evm( + assembly: list[AssemblyInstruction], symbol_map: dict[SymbolKey, int] +) -> bytes: """ Assembles assembly into EVM bytecode From fc9e49b3bb56e1558f1ef52405130d1d28040291 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jun 2025 16:56:45 +0300 Subject: [PATCH 076/172] add symbols.py --- tests/unit/compiler/asm/test_assembler.py | 3 +- vyper/evm/assembler/core.py | 105 +--------------------- vyper/evm/assembler/symbols.py | 101 +++++++++++++++++++++ 3 files changed, 104 insertions(+), 105 deletions(-) create mode 100644 vyper/evm/assembler/symbols.py diff --git a/tests/unit/compiler/asm/test_assembler.py b/tests/unit/compiler/asm/test_assembler.py index dbf405f53b..ca21f6c2e1 100644 --- a/tests/unit/compiler/asm/test_assembler.py +++ b/tests/unit/compiler/asm/test_assembler.py @@ -1,4 +1,5 @@ -from vyper.evm.assembler.core import CONST, CONST_ADD, CONST_MAX, CONSTREF, _resolve_constants +from vyper.evm.assembler.core import _resolve_constants +from vyper.evm.assembler.symbols import CONST, CONSTREF, CONST_ADD, CONST_MAX def test_const_add(): diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index b96256e581..17f92a9f26 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -1,6 +1,7 @@ from dataclasses import dataclass from typing import Any +from vyper.evm.assembler.symbols import CONST, CONST_ADD, CONST_MAX, Label, CONSTREF, SymbolKey from vyper.evm.opcodes import get_opcodes, version_check from vyper.exceptions import CompilerPanic from vyper.utils import OrderedSet @@ -18,22 +19,6 @@ def num_to_bytearray(x): return o -class Label: - def __init__(self, label: str): - assert isinstance(label, str) - self.label = label - - def __repr__(self): - return f"LABEL {self.label}" - - def __eq__(self, other): - if not isinstance(other, Label): - return False - return self.label == other.label - - def __hash__(self): - return hash(self.label) - @dataclass class DataHeader: @@ -42,94 +27,6 @@ class DataHeader: def __repr__(self): return f"DATA {self.label.label}" - -class CONSTREF: - def __init__(self, label: str): - assert isinstance(label, str) - self.label = label - - def __repr__(self): - return f"CONSTREF {self.label}" - - def __eq__(self, other): - if not isinstance(other, CONSTREF): - return False - return self.label == other.label - - def __hash__(self): - return hash(self.label) - - -SymbolKey = Label | CONSTREF - - -class CONST: - def __init__(self, name: str, value: int): - assert isinstance(name, str) - assert isinstance(value, int) - self.name = name - self.value = value - - def __repr__(self): - return f"CONST {self.name} {self.value}" - - def __eq__(self, other): - if not isinstance(other, CONST): - return False - return self.name == other.name and self.value == other.value - - -class BaseConstOp: - def __init__(self, name: str, op1: str | int, op2: str | int): - assert isinstance(name, str) - assert isinstance(op1, (str, int)) - assert isinstance(op2, (str, int)) - self.name = name - self.op1 = op1 - self.op2 = op2 - - def __eq__(self, other): - if not isinstance(other, type(self)): - return False - return self.name == other.name and self.op1 == other.op1 and self.op2 == other.op2 - - def _resolve_operand(self, operand: str | int, symbol_map: dict[SymbolKey, int]) -> int | None: - if isinstance(operand, str): - op_ref = CONSTREF(operand) - if op_ref in symbol_map: - return symbol_map[op_ref] - elif isinstance(operand, int): - return operand - return None - - def calculate(self, symbol_map: dict[SymbolKey, int]) -> int | None: - op1_val = self._resolve_operand(self.op1, symbol_map) - op2_val = self._resolve_operand(self.op2, symbol_map) - - if op1_val is not None and op2_val is not None: - return self._apply_operation(op1_val, op2_val) - return None - - def _apply_operation(self, op1_val: int, op2_val: int) -> int: - raise NotImplementedError("Subclasses must implement _apply_operation") - - -class CONST_ADD(BaseConstOp): - def __repr__(self): - return f"CONST_ADD {self.name} {self.op1} {self.op2}" - - def _apply_operation(self, op1_val: int, op2_val: int) -> int: - return op1_val + op2_val - - -class CONST_MAX(BaseConstOp): - def __repr__(self): - return f"CONST_MAX {self.name} {self.op1} {self.op2}" - - def _apply_operation(self, op1_val: int, op2_val: int) -> int: - return max(op1_val, op2_val) - - class PUSHLABEL: def __init__(self, label: Label): assert isinstance(label, Label), label diff --git a/vyper/evm/assembler/symbols.py b/vyper/evm/assembler/symbols.py new file mode 100644 index 0000000000..b4e93efd19 --- /dev/null +++ b/vyper/evm/assembler/symbols.py @@ -0,0 +1,101 @@ + +class Label: + def __init__(self, label: str): + assert isinstance(label, str) + self.label = label + + def __repr__(self): + return f"LABEL {self.label}" + + def __eq__(self, other): + if not isinstance(other, Label): + return False + return self.label == other.label + + def __hash__(self): + return hash(self.label) + + +class CONSTREF: + def __init__(self, label: str): + assert isinstance(label, str) + self.label = label + + def __repr__(self): + return f"CONSTREF {self.label}" + + def __eq__(self, other): + if not isinstance(other, CONSTREF): + return False + return self.label == other.label + + def __hash__(self): + return hash(self.label) + +SymbolKey = Label | CONSTREF + +class CONST: + def __init__(self, name: str, value: int): + assert isinstance(name, str) + assert isinstance(value, int) + self.name = name + self.value = value + + def __repr__(self): + return f"CONST {self.name} {self.value}" + + def __eq__(self, other): + if not isinstance(other, CONST): + return False + return self.name == other.name and self.value == other.value + + +class BaseConstOp: + def __init__(self, name: str, op1: str | int, op2: str | int): + assert isinstance(name, str) + assert isinstance(op1, (str, int)) + assert isinstance(op2, (str, int)) + self.name = name + self.op1 = op1 + self.op2 = op2 + + def __eq__(self, other): + if not isinstance(other, type(self)): + return False + return self.name == other.name and self.op1 == other.op1 and self.op2 == other.op2 + + def _resolve_operand(self, operand: str | int, symbol_map: dict[SymbolKey, int]) -> int | None: + if isinstance(operand, str): + op_ref = CONSTREF(operand) + if op_ref in symbol_map: + return symbol_map[op_ref] + elif isinstance(operand, int): + return operand + return None + + def calculate(self, symbol_map: dict[SymbolKey, int]) -> int | None: + op1_val = self._resolve_operand(self.op1, symbol_map) + op2_val = self._resolve_operand(self.op2, symbol_map) + + if op1_val is not None and op2_val is not None: + return self._apply_operation(op1_val, op2_val) + return None + + def _apply_operation(self, op1_val: int, op2_val: int) -> int: + raise NotImplementedError("Subclasses must implement _apply_operation") + + +class CONST_ADD(BaseConstOp): + def __repr__(self): + return f"CONST_ADD {self.name} {self.op1} {self.op2}" + + def _apply_operation(self, op1_val: int, op2_val: int) -> int: + return op1_val + op2_val + + +class CONST_MAX(BaseConstOp): + def __repr__(self): + return f"CONST_MAX {self.name} {self.op1} {self.op2}" + + def _apply_operation(self, op1_val: int, op2_val: int) -> int: + return max(op1_val, op2_val) From 8359928c30bf25222f05d2d997d583aff4a2cd4f Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jun 2025 16:57:09 +0300 Subject: [PATCH 077/172] lint --- tests/unit/compiler/asm/test_assembler.py | 2 +- vyper/evm/assembler/core.py | 4 ++-- vyper/evm/assembler/symbols.py | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/unit/compiler/asm/test_assembler.py b/tests/unit/compiler/asm/test_assembler.py index ca21f6c2e1..f234265ec9 100644 --- a/tests/unit/compiler/asm/test_assembler.py +++ b/tests/unit/compiler/asm/test_assembler.py @@ -1,5 +1,5 @@ from vyper.evm.assembler.core import _resolve_constants -from vyper.evm.assembler.symbols import CONST, CONSTREF, CONST_ADD, CONST_MAX +from vyper.evm.assembler.symbols import CONST, CONST_ADD, CONST_MAX, CONSTREF def test_const_add(): diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 17f92a9f26..6052d20af9 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import Any -from vyper.evm.assembler.symbols import CONST, CONST_ADD, CONST_MAX, Label, CONSTREF, SymbolKey +from vyper.evm.assembler.symbols import CONST, CONST_ADD, CONST_MAX, CONSTREF, Label, SymbolKey from vyper.evm.opcodes import get_opcodes, version_check from vyper.exceptions import CompilerPanic from vyper.utils import OrderedSet @@ -19,7 +19,6 @@ def num_to_bytearray(x): return o - @dataclass class DataHeader: label: Label @@ -27,6 +26,7 @@ class DataHeader: def __repr__(self): return f"DATA {self.label.label}" + class PUSHLABEL: def __init__(self, label: Label): assert isinstance(label, Label), label diff --git a/vyper/evm/assembler/symbols.py b/vyper/evm/assembler/symbols.py index b4e93efd19..575dc34d7a 100644 --- a/vyper/evm/assembler/symbols.py +++ b/vyper/evm/assembler/symbols.py @@ -1,4 +1,3 @@ - class Label: def __init__(self, label: str): assert isinstance(label, str) @@ -32,8 +31,10 @@ def __eq__(self, other): def __hash__(self): return hash(self.label) + SymbolKey = Label | CONSTREF + class CONST: def __init__(self, name: str, value: int): assert isinstance(name, str) From ecba4af3f0aaee972f88191b5cf9cc1abde867b7 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jun 2025 17:00:42 +0300 Subject: [PATCH 078/172] CONST_SUB and test --- tests/unit/compiler/asm/test_assembler.py | 10 +++++++++- vyper/evm/assembler/core.py | 4 ++-- vyper/evm/assembler/symbols.py | 8 ++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/tests/unit/compiler/asm/test_assembler.py b/tests/unit/compiler/asm/test_assembler.py index f234265ec9..e3fbead1cc 100644 --- a/tests/unit/compiler/asm/test_assembler.py +++ b/tests/unit/compiler/asm/test_assembler.py @@ -1,5 +1,5 @@ from vyper.evm.assembler.core import _resolve_constants -from vyper.evm.assembler.symbols import CONST, CONST_ADD, CONST_MAX, CONSTREF +from vyper.evm.assembler.symbols import CONST, CONST_ADD, CONST_MAX, CONST_SUB, CONSTREF def test_const_add(): @@ -10,6 +10,14 @@ def test_const_add(): assert symbol_map[CONSTREF("d")] == 13 +def test_const_sub(): + asm = [CONST("a", 1), CONST("b", 2), CONST_SUB("c", "a", "b"), CONST_ADD("d", "c", 10)] + symbol_map = {} + _resolve_constants(asm, symbol_map) + assert symbol_map[CONSTREF("c")] == -1 + assert symbol_map[CONSTREF("d")] == 9 + + def test_const_max(): asm = [CONST("a", 1), CONST("b", 2), CONST_MAX("c", "a", "b"), CONST_MAX("d", "c", 10)] symbol_map = {} diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 6052d20af9..507e044f83 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import Any -from vyper.evm.assembler.symbols import CONST, CONST_ADD, CONST_MAX, CONSTREF, Label, SymbolKey +from vyper.evm.assembler.symbols import CONST, CONSTREF, BaseConstOp, Label, SymbolKey from vyper.evm.opcodes import get_opcodes, version_check from vyper.exceptions import CompilerPanic from vyper.utils import OrderedSet @@ -151,7 +151,7 @@ def _resolve_constants(assembly: list[AssemblyInstruction], symbol_map: dict[Sym while True: changed = False for item in assembly: - if isinstance(item, (CONST_ADD, CONST_MAX)): + if isinstance(item, BaseConstOp): # Skip if this constant is already resolved if CONSTREF(item.name) in symbol_map: continue diff --git a/vyper/evm/assembler/symbols.py b/vyper/evm/assembler/symbols.py index 575dc34d7a..26f0e1226c 100644 --- a/vyper/evm/assembler/symbols.py +++ b/vyper/evm/assembler/symbols.py @@ -94,6 +94,14 @@ def _apply_operation(self, op1_val: int, op2_val: int) -> int: return op1_val + op2_val +class CONST_SUB(BaseConstOp): + def __repr__(self): + return f"CONST_SUB {self.name} {self.op1} {self.op2}" + + def _apply_operation(self, op1_val: int, op2_val: int) -> int: + return op1_val - op2_val + + class CONST_MAX(BaseConstOp): def __repr__(self): return f"CONST_MAX {self.name} {self.op1} {self.op2}" From 19151878fdd89d17fa12dc0ea17d5b5de2dbc98f Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 17 Jun 2025 10:39:58 +0300 Subject: [PATCH 079/172] label upgrades --- tests/functional/venom/parser/test_parsing.py | 45 ++++++++++++ vyper/venom/basicblock.py | 4 +- vyper/venom/context.py | 6 ++ vyper/venom/parser.py | 73 ++++++++++++++++--- 4 files changed, 115 insertions(+), 13 deletions(-) diff --git a/tests/functional/venom/parser/test_parsing.py b/tests/functional/venom/parser/test_parsing.py index 728e67bdf8..39644c4333 100644 --- a/tests/functional/venom/parser/test_parsing.py +++ b/tests/functional/venom/parser/test_parsing.py @@ -366,3 +366,48 @@ def test_phis(): parsed_fn = next(iter(ctx.functions.values())) assert_bb_eq(parsed_fn.get_basic_block(expect_bb.label.name), expect_bb) + + +def test_global_vars(): + source = """ + + global_var: 10 + + function main { + main: + %1 = 1 + %2 = 2 + %3 = add %1, %2 + } + """ + ctx = parse_venom(source) + + # assert_ctx_eq(ctx, expected_ctx) + + +def test_labels_with_addresses(): + source = """ + my_global: 0x1000 + + function main { + main: @0x20 + %1 = 1 + jmp @other_block + other_block: + %2 = 2 + %3 = add %1, %2 + } + """ + ctx = parse_venom(source) + + assert "my_global" in ctx.global_labels + assert ctx.global_labels["my_global"] == 0x1000 + + main_fn = ctx.get_function(IRLabel("main")) + assert main_fn is not None + + main_bb = main_fn.get_basic_block("main") + assert main_bb.label.address == 0x20 + + other_bb = main_fn.get_basic_block("other_block") + assert other_bb.label.address is None \ No newline at end of file diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 4b7dae1918..c7a918f725 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -201,11 +201,13 @@ class IRLabel(IROperand): # (like a function name, try to preserve it in optimization passes) is_symbol: bool = False value: str + address: Optional[int] = None # optional address override - def __init__(self, value: str, is_symbol: bool = False) -> None: + def __init__(self, value: str, is_symbol: bool = False, address: Optional[int] = None) -> None: assert isinstance(value, str), f"not a str: {value} ({type(value)})" assert len(value) > 0 self.is_symbol = is_symbol + self.address = address super().__init__(value) _IS_IDENTIFIER = re.compile("[0-9a-zA-Z_]*") diff --git a/vyper/venom/context.py b/vyper/venom/context.py index f50dc1220f..7b6a441a06 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -34,6 +34,7 @@ class IRContext: functions: dict[IRLabel, IRFunction] entry_function: Optional[IRFunction] constants: dict[str, int] # globally defined constants + global_labels: dict[str, int] # globally defined labels with addresses data_segment: list[DataSection] last_label: int last_variable: int @@ -43,6 +44,7 @@ def __init__(self) -> None: self.entry_function = None self.data_segment = [] self.constants = {} + self.global_labels = {} self.last_label = 0 self.last_variable = 0 @@ -102,6 +104,10 @@ def add_constant(self, name: str, value: int) -> None: assert name not in self.constants self.constants[name] = value + def add_global_label(self, name: str, address: int) -> None: + assert name not in self.global_labels + self.global_labels[name] = address + def as_graph(self) -> str: s = ["digraph G {"] for fn in self.functions.values(): diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index 81934482fc..55acc473de 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -26,7 +26,10 @@ # Allow multiple comment styles COMMENT: ";" /[^\\n]*/ | "//" /[^\\n]*/ | "#" /[^\\n]*/ - start: function* data_segment? + start: (global_label | function)* data_segment? + + # Global label definitions with optional address override + global_label: LABEL_IDENT ":" CONST # TODO: consider making entry block implicit, e.g. # `"{" instruction+ block* "}"` @@ -36,7 +39,7 @@ data_section: "dbsection" LABEL_IDENT ":" data_item+ data_item: "db" (HEXSTR | LABEL) - block: LABEL_IDENT ":" "\\n" statement* + block: LABEL_IDENT ":" ("@" CONST)? "\\n" statement* statement: (instruction | assignment) "\\n" assignment: VAR_IDENT "=" expr @@ -106,21 +109,53 @@ class _DataSegment(_TypedItem): pass +class _GlobalLabel(_TypedItem): + pass + + class VenomTransformer(Transformer): def start(self, children) -> IRContext: ctx = IRContext() - if len(children) > 0 and isinstance(children[-1], _DataSegment): - ctx.data_segment = children.pop().children - - funcs = children + + # Separate global labels, functions, and data segments + global_labels = [] + funcs = [] + data_segment = None + + for child in children: + if isinstance(child, _GlobalLabel): + global_labels.append(child) + elif isinstance(child, _DataSegment): + data_segment = child + else: + funcs.append(child) + + # Process global labels + for global_label in global_labels: + name, address = global_label.children + ctx.add_global_label(name, address) + + # Process data segment + if data_segment: + ctx.data_segment = data_segment.children + + # Process functions for fn_name, blocks in funcs: fn = ctx.create_function(fn_name) if ctx.entry_function is None: ctx.entry_function = fn fn._basic_block_dict.clear() - for block_name, instructions in blocks: - bb = IRBasicBlock(IRLabel(block_name, True), fn) + for block_data in blocks: + if len(block_data) == 2: + # No address override: (block_name, instructions) + block_name, instructions = block_data + bb = IRBasicBlock(IRLabel(block_name, True), fn) + else: + # With address override: (block_name, address, instructions) + block_name, address, instructions = block_data + bb = IRBasicBlock(IRLabel(block_name, True, address), fn) + fn.append_basic_block(bb) for instruction in instructions: @@ -132,7 +167,11 @@ def start(self, children) -> IRContext: return ctx - def function(self, children) -> tuple[str, list[tuple[str, list[IRInstruction]]]]: + def global_label(self, children) -> _GlobalLabel: + name, address_literal = children + return _GlobalLabel([name, address_literal.value]) + + def function(self, children) -> tuple[str, list]: name, *blocks = children return name, blocks @@ -158,9 +197,19 @@ def data_item(self, children): item = item.replace("_", "") return DataItem(bytes.fromhex(item)) - def block(self, children) -> tuple[str, list[IRInstruction]]: - label, *instructions = children - return label, instructions + def block(self, children) -> tuple: + label = children[0] + + # Find where the instructions start + if len(children) >= 3 and isinstance(children[1], IRLiteral): + # With address override: label, address_literal, *instructions + address_literal = children[1] + instructions = children[2:] + return (label, address_literal.value, instructions) + else: + # No address override: label, *instructions + instructions = children[1:] + return (label, instructions) def assignment(self, children) -> IRInstruction: to, value = children From c0af8128f702c3c0619eeeac6805e5b2e2a19dc7 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 17 Jun 2025 10:50:43 +0300 Subject: [PATCH 080/172] test --- tests/functional/venom/parser/test_parsing.py | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/tests/functional/venom/parser/test_parsing.py b/tests/functional/venom/parser/test_parsing.py index 39644c4333..9b2a99e811 100644 --- a/tests/functional/venom/parser/test_parsing.py +++ b/tests/functional/venom/parser/test_parsing.py @@ -410,4 +410,34 @@ def test_labels_with_addresses(): assert main_bb.label.address == 0x20 other_bb = main_fn.get_basic_block("other_block") - assert other_bb.label.address is None \ No newline at end of file + assert other_bb.label.address is None + +def test_labels_with_addresses_used_in_function(): + source = """ + my_global: 0x1000 + + function main { + main: @0x20 + %1 = 1 + jmp @other_block + other_block: + %3 = add %1, @my_global + } + """ + ctx = parse_venom(source) + + assert "my_global" in ctx.global_labels + assert ctx.global_labels["my_global"] == 0x1000 + + main_fn = ctx.get_function(IRLabel("main")) + assert main_fn is not None + + main_bb = main_fn.get_basic_block("main") + assert main_bb.label.address == 0x20 + + other_bb = main_fn.get_basic_block("other_block") + assert other_bb.label.address is None + + add_inst = other_bb.instructions[0] + assert add_inst.opcode == "add" + assert add_inst.operands[0].value == "my_global" \ No newline at end of file From 201e72b98378afbddeae1bf3e0a1fa8362c31514 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 18 Jun 2025 14:53:32 +0300 Subject: [PATCH 081/172] wip --- .../compiler/venom/test_venom_to_assembly.py | 20 +++++++++++++++++++ vyper/venom/venom_to_assembly.py | 4 ++++ 2 files changed, 24 insertions(+) diff --git a/tests/unit/compiler/venom/test_venom_to_assembly.py b/tests/unit/compiler/venom/test_venom_to_assembly.py index 73162485b2..77710e98ec 100644 --- a/tests/unit/compiler/venom/test_venom_to_assembly.py +++ b/tests/unit/compiler/venom/test_venom_to_assembly.py @@ -1,3 +1,4 @@ +from vyper.compiler.phases import generate_bytecode from vyper.venom.parser import parse_venom from vyper.venom.venom_to_assembly import VenomCompiler @@ -34,3 +35,22 @@ def test_optimistic_swap_params(): asm = VenomCompiler(ctx).generate_evm_assembly() assert asm == ["SWAP2", "PUSH1", 117, "POP", "MSTORE", "MSTORE", "JUMP"] + +def test_global_vars(): + code = """ + global_var: 10 + + function foo { + main: + %1 = 1 + %2 = 2 + %3 = add %1, @global_var + ret %3 + } + """ + ctx = parse_venom(code) + asm = VenomCompiler(ctx).generate_evm_assembly() + bytecode, _ = generate_bytecode(asm) + print(f"0x{bytecode.hex()}") + print(asm) + assert asm == ["SWAP2", "PUSH1", 117, "POP", "MSTORE", "MSTORE", "JUMP"] \ No newline at end of file diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 12ac05fe22..f0d94ca673 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -167,6 +167,10 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr asm: list[AssemblyInstruction] = [] + # Add global variables to the assembly + for var_name, var_value in self.ctx.global_labels.items(): + asm.append(Label(var_name)) + for fn in self.ctx.functions.values(): ac = IRAnalysesCache(fn) From 160634faa04cbf9fee947a58f338fc632737074e Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 23 Jun 2025 13:27:05 +0300 Subject: [PATCH 082/172] remove `dbsection` --- tests/functional/venom/parser/test_parsing.py | 4 ++-- vyper/venom/context.py | 2 +- vyper/venom/parser.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/functional/venom/parser/test_parsing.py b/tests/functional/venom/parser/test_parsing.py index 9b2a99e811..3e73480e5e 100644 --- a/tests/functional/venom/parser/test_parsing.py +++ b/tests/functional/venom/parser/test_parsing.py @@ -88,7 +88,7 @@ def test_data_section(): } data readonly { - dbsection selector_buckets: + selector_buckets: db @selector_bucket_0 db @fallback db @selector_bucket_2 @@ -199,7 +199,7 @@ def test_multi_function_and_data(): } data readonly { - dbsection selector_buckets: + selector_buckets: db @selector_bucket_0 db @fallback db @selector_bucket_2 diff --git a/vyper/venom/context.py b/vyper/venom/context.py index 7b6a441a06..969aefe29b 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -24,7 +24,7 @@ class DataSection: data_items: list[DataItem] = field(default_factory=list) def __str__(self): - ret = [f"dbsection {self.label.value}:"] + ret = [f"{self.label.value}:"] for item in self.data_items: ret.append(f" db {item}") return "\n".join(ret) diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index 74e7365015..beb84eba4d 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -56,7 +56,7 @@ label_ref: "@" (IDENT | ESCAPED_STRING) data_segment: "data" "readonly" "{" data_section* "}" - data_section: "dbsection" label_name ":" NEWLINE+ data_item+ + data_section: label_name ":" NEWLINE+ data_item+ data_item: "db" (HEXSTR | label_ref) NEWLINE+ DOUBLE_QUOTE: "\\"" From e6fc4e5ccf3be1ab7485a0f6a4125db94a85e139 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 23 Jun 2025 15:45:04 +0300 Subject: [PATCH 083/172] `db` instruction and hex operand --- vyper/venom/basicblock.py | 17 +++++++++++++++++ vyper/venom/venom_to_assembly.py | 12 ++++++++++++ 2 files changed, 29 insertions(+) diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 032376faaf..09fb942e30 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -77,6 +77,7 @@ "jnz", "log", "nop", + "db", ] ) @@ -192,6 +193,22 @@ def plain_name(self) -> str: return self.name.strip("%") +class IRHexString(IROperand): + """ + IRHexString represents a hex string literal in IR, + currently only used for db instructions + """ + + value: bytes + + def __init__(self, value: bytes) -> None: + assert isinstance(value, bytes), value + super().__init__(value) + + def __repr__(self) -> str: + return f'x"{self.value.hex()}"' + + class IRLabel(IROperand): """ IRLabel represents a label in IR. A label is a string that starts with a %. diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index f0d94ca673..3bad2da3f0 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -25,6 +25,7 @@ IRLiteral, IROperand, IRVariable, + IRHexString, ) from vyper.venom.context import IRContext, IRFunction from vyper.venom.passes import NormalizationPass @@ -427,6 +428,8 @@ def _generate_evm_for_instruction( log_topic_count = inst.operands[0].value assert log_topic_count in [0, 1, 2, 3, 4], "Invalid topic count" operands = inst.operands[1:] + elif opcode == "db": + operands = [] else: operands = inst.operands @@ -513,6 +516,15 @@ def _generate_evm_for_instruction( pass elif opcode == "dbname": pass + elif opcode == "db": + # Handle inline db instruction - emit data directly to assembly + data_operand = inst.operands[0] + if isinstance(data_operand, IRLabel): + assembly.append(DATA_ITEM(_as_asm_symbol(data_operand))) + elif isinstance(data_operand, IRHexString): + assembly.append(DATA_ITEM(data_operand.value)) + else: + raise Exception(f"Unsupported db operand type: {type(data_operand)}") elif opcode == "jnz": # jump if not zero if_nonzero_label, if_zero_label = inst.get_label_operands() From 2a7a7508d8bf8f139516bfa2b10e803e7c8f6d1d Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 23 Jun 2025 15:45:24 +0300 Subject: [PATCH 084/172] update the venom parser --- vyper/venom/parser.py | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index beb84eba4d..c5d4530494 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -10,6 +10,7 @@ IRLiteral, IROperand, IRVariable, + IRHexString, ) from vyper.venom.context import DataItem, DataSection, IRContext from vyper.venom.function import IRFunction @@ -43,10 +44,11 @@ expr: instruction | operand instruction: IDENT operands_list? + | DB operands_list operands_list: operand ("," operand)* - operand: VAR_IDENT | CONST | label_ref + operand: VAR_IDENT | CONST | label_ref | HEXSTR VAR_IDENT: "%" (DIGIT|LETTER|"_"|":")+ @@ -57,10 +59,11 @@ data_segment: "data" "readonly" "{" data_section* "}" data_section: label_name ":" NEWLINE+ data_item+ - data_item: "db" (HEXSTR | label_ref) NEWLINE+ + data_item: DB (HEXSTR | label_ref) NEWLINE+ DOUBLE_QUOTE: "\\"" IDENT: (DIGIT|LETTER|"_")+ + DB: "db" HEXSTR: "x" DOUBLE_QUOTE (HEXDIGIT|"_")+ DOUBLE_QUOTE CONST: SIGNED_INT | "0x" HEXDIGIT+ @@ -233,8 +236,9 @@ def data_section(self, children) -> DataSection: return DataSection(label, data_items) def data_item(self, children) -> DataItem: - # children[0] is the data content, rest are NEWLINE tokens - item = children[0] + # children[0] is the DB "IDENT", children[1] is the data content, rest are NEWLINE tokens + assert children[0] == "db", f"Expected 'db', got {children[0]}" + item = children[1] if isinstance(item, IRLabel): return DataItem(item) @@ -264,12 +268,19 @@ def instruction(self, children) -> IRInstruction: if len(children) == 1: # just the opcode (IDENT) opcode = str(children[0]) + # Handle Lark tokens + if hasattr(children[0], 'value'): + opcode = children[0].value operands = [] - else: - assert len(children) == 2 - # IDENT and operands_list + elif len(children) == 2: + # Two cases: IDENT + operands_list OR "db" + operands_list opcode = str(children[0]) + # Handle Lark tokens + if hasattr(children[0], 'value'): + opcode = children[0].value operands = children[1] + else: + raise ValueError(f"Unexpected instruction children: {children}") # reverse operands, venom internally represents top of stack # as rightmost operand @@ -278,7 +289,7 @@ def instruction(self, children) -> IRInstruction: # invoke operands = [operands[0]] + list(reversed(operands[1:])) # special cases: operands with labels look better un-reversed - elif opcode not in ("jmp", "jnz", "djmp", "phi"): + elif opcode not in ("jmp", "jnz", "djmp", "phi", "db"): operands.reverse() return IRInstruction(opcode, operands) @@ -286,7 +297,14 @@ def operands_list(self, children) -> list[IROperand]: return children def operand(self, children) -> IROperand: - return children[0] + operand = children[0] + if isinstance(operand, str) and operand.startswith('x"'): + # Handle hex strings - convert to IRHexString + assert operand.endswith('"') + hex_content = operand.removeprefix('x"').removesuffix('"') + hex_content = hex_content.replace("_", "") + return IRHexString(bytes.fromhex(hex_content)) + return operand def func_name(self, children) -> str: # func_name can be IDENT or ESCAPED_STRING @@ -314,6 +332,9 @@ def CONST(self, val) -> IRLiteral: def IDENT(self, val) -> str: return val.value + def DB(self, val) -> str: + return val.value + def HEXSTR(self, val) -> str: return val.value From f6379872217277eafc0acd80b1b7dbf78f98327c Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 23 Jun 2025 16:59:58 +0300 Subject: [PATCH 085/172] wip --- tests/venom_utils.py | 1 - vyper/venom/__init__.py | 24 ++++++++++++++++++++++-- vyper/venom/ir_node_to_venom.py | 23 ++++++++++++++++++++--- vyper/venom/parser.py | 23 ++++++++++++++++------- vyper/venom/venom_to_assembly.py | 22 +++++----------------- 5 files changed, 63 insertions(+), 30 deletions(-) diff --git a/tests/venom_utils.py b/tests/venom_utils.py index 5c3dd83d60..7ebef8d8af 100644 --- a/tests/venom_utils.py +++ b/tests/venom_utils.py @@ -48,7 +48,6 @@ def assert_ctx_eq(ctx1: IRContext, ctx2: IRContext): # check entry function is the same assert next(iter(ctx1.functions.keys())) == next(iter(ctx2.functions.keys())) - assert ctx1.data_segment == ctx2.data_segment, ctx2.data_segment class PrePostChecker: diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index c396cd65a3..47dd23dc87 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -10,8 +10,8 @@ from vyper.exceptions import CompilerPanic from vyper.venom.analysis import MemSSA from vyper.venom.analysis.analysis import IRAnalysesCache -from vyper.venom.basicblock import IRLabel, IRLiteral -from vyper.venom.context import IRContext +from vyper.venom.basicblock import IRBasicBlock, IRHexString, IRLabel, IRLiteral +from vyper.venom.context import DataSection, IRContext from vyper.venom.function import IRFunction from vyper.venom.ir_node_to_venom import ir_node_to_venom from vyper.venom.passes import ( @@ -40,6 +40,24 @@ DEFAULT_OPT_LEVEL = OptimizationLevel.default() +def convert_data_segment_to_function(ctx: IRContext, data_sections: list[DataSection]) -> None: + for data_section in data_sections: + fn = ctx.create_function(data_section.label.value) + + fn.clear_basic_blocks() + bb = IRBasicBlock(data_section.label, fn) + fn.append_basic_block(bb) + + for data_item in data_section.data_items: + if isinstance(data_item.data, IRLabel): + bb.append_instruction("db", data_item.data) + else: + # Convert bytes to IRHexString + assert isinstance(data_item.data, bytes) + hex_string = IRHexString(data_item.data) + bb.append_instruction("db", hex_string) + + bb.append_instruction("stop") def generate_assembly_experimental( venom_ctx: IRContext, optimize: OptimizationLevel = DEFAULT_OPT_LEVEL @@ -145,6 +163,8 @@ def generate_venom( ctx.append_data_section(IRLabel(section_name)) ctx.append_data_item(data) + convert_data_segment_to_function(ctx, ctx.data_segment) + for constname, value in constants.items(): ctx.add_constant(constname, value) diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 6f76ed0bdd..911870ce54 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -16,6 +16,7 @@ IRLiteral, IROperand, IRVariable, + IRHexString, ) from vyper.venom.context import IRContext from vyper.venom.function import IRFunction, IRParameter @@ -540,14 +541,30 @@ def _convert_ir_bb(fn, ir, symbols): return IRLabel(ir.args[0].value, True) elif ir.value == "data": label = IRLabel(ir.args[0].value, True) - ctx.append_data_section(label) + + # Create revert function first (if not already created) + if "revert" not in fn.ctx.functions: + revert_fn = fn.ctx.create_function("revert") + revert_fn.clear_basic_blocks() + revert_bb = IRBasicBlock(IRLabel("revert"), revert_fn) + revert_fn.append_basic_block(revert_bb) + revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) + + data_fn = fn.ctx.create_function(label.value) + data_fn.clear_basic_blocks() + data_bb = IRBasicBlock(label, data_fn) + data_fn.append_basic_block(data_bb) + for c in ir.args[1:]: if isinstance(c.value, bytes): - ctx.append_data_item(c.value) + hex_string = IRHexString(c.value) + data_bb.append_instruction("db", hex_string) elif isinstance(c, IRnode): data = _convert_ir_bb(fn, c, symbols) assert isinstance(data, IRLabel) # help mypy - ctx.append_data_item(data) + data_bb.append_instruction("db", data) + + data_bb.append_instruction("stop") elif ir.value == "label": label = IRLabel(ir.args[0].value, True) bb = fn.get_basic_block() diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index c5d4530494..71f951c8f3 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -3,6 +3,7 @@ from lark import Lark, Transformer +from vyper.venom import convert_data_segment_to_function from vyper.venom.basicblock import ( IRBasicBlock, IRInstruction, @@ -145,11 +146,7 @@ def start(self, children) -> IRContext: name, address = global_label.children ctx.add_global_label(name, address) - # Process data segment - if data_segment: - ctx.data_segment = data_segment.children - - # Process functions + # Process functions first for fn_name, items in funcs: fn = ctx.create_function(fn_name) if ctx.entry_function is None: @@ -197,7 +194,12 @@ def start(self, children) -> IRContext: assert isinstance(instruction, IRInstruction) # help mypy bb.insert_instruction(instruction) - _set_last_var(fn) + # Process data segment after functions by converting it to a regular function + if data_segment: + self._add_revert_postamble_function(ctx) + convert_data_segment_to_function(ctx, data_segment.children) + + _set_last_var(fn) _set_last_label(ctx) return ctx @@ -250,7 +252,14 @@ def data_item(self, children) -> DataItem: item = item.replace("_", "") return DataItem(bytes.fromhex(item)) - + def _add_revert_postamble_function(self, ctx: IRContext) -> None: + fn = ctx.create_function("revert") + + fn.clear_basic_blocks() + bb = IRBasicBlock(IRLabel("revert"), fn) + fn.append_basic_block(bb) + + bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) def assignment(self, children) -> IRInstruction: to, value = children diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 3bad2da3f0..656a82956f 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -184,23 +184,7 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr self._generate_evm_for_basicblock_r(asm, fn.entry, StackModel()) - asm.extend(_REVERT_POSTAMBLE) - - # Append data segment - for data_section in self.ctx.data_segment: - label = data_section.label - asm_data_section: list[AssemblyInstruction] = [] - asm_data_section.append(DataHeader(_as_asm_symbol(label))) - for item in data_section.data_items: - data = item.data - if isinstance(data, IRLabel): - asm_data_section.append(DATA_ITEM(_as_asm_symbol(data))) - else: - assert isinstance(data, bytes) - asm_data_section.append(DATA_ITEM(data)) - - asm.extend(asm_data_section) - + asm.extend(_REVERT_POSTAMBLE) # FIXME FIXME FIXME if no_optimize is False: optimize_assembly(asm) @@ -430,6 +414,10 @@ def _generate_evm_for_instruction( operands = inst.operands[1:] elif opcode == "db": operands = [] + elif opcode == "revert": + # Filter out literals from revert operands for stack reordering + # since literals are handled directly in _emit_input_operands + operands = [op for op in inst.operands if not isinstance(op, IRLiteral)] else: operands = inst.operands From 65c8a8fa86ce89cb287c969154e4b37670c9bd1b Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 23 Jun 2025 17:18:03 +0300 Subject: [PATCH 086/172] wip --- vyper/venom/__init__.py | 14 +++++++++----- vyper/venom/context.py | 10 +++++----- vyper/venom/venom_to_assembly.py | 2 +- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 47dd23dc87..07deb274ee 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -40,14 +40,18 @@ DEFAULT_OPT_LEVEL = OptimizationLevel.default() -def convert_data_segment_to_function(ctx: IRContext, data_sections: list[DataSection]) -> None: +def convert_data_segment_to_function(ctx: IRContext, data_sections: list[DataSection]) -> None: + if len(data_sections) == 0: + return + + first_label = data_sections[0].label + fn = ctx.create_function(first_label.value) + fn.clear_basic_blocks() + for data_section in data_sections: - fn = ctx.create_function(data_section.label.value) - - fn.clear_basic_blocks() bb = IRBasicBlock(data_section.label, fn) fn.append_basic_block(bb) - + for data_item in data_section.data_items: if isinstance(data_item.data, IRLabel): bb.append_instruction("db", data_item.data) diff --git a/vyper/venom/context.py b/vyper/venom/context.py index 969aefe29b..f9f4221b2f 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -122,10 +122,10 @@ def __repr__(self) -> str: s.append(IRFunction.__repr__(fn)) s.append("\n") - if len(self.data_segment) > 0: - s.append("data readonly {") - for data_section in self.data_segment: - s.append(textwrap.indent(DataSection.__str__(data_section), " ")) - s.append("}") + # if len(self.data_segment) > 0: + # s.append("data readonly {") + # for data_section in self.data_segment: + # s.append(textwrap.indent(DataSection.__str__(data_section), " ")) + # s.append("}") return "\n".join(s) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 656a82956f..2ce5eb7dbd 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -184,7 +184,7 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr self._generate_evm_for_basicblock_r(asm, fn.entry, StackModel()) - asm.extend(_REVERT_POSTAMBLE) # FIXME FIXME FIXME + # asm.extend(_REVERT_POSTAMBLE) # FIXME FIXME FIXME if no_optimize is False: optimize_assembly(asm) From 3726d61f7fdeccc9d3e0a74f586487ce187e2f32 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 23 Jun 2025 23:45:04 +0300 Subject: [PATCH 087/172] in function data --- vyper/compiler/phases.py | 24 +++++++++++++++++++----- vyper/venom/__init__.py | 2 +- vyper/venom/basicblock.py | 10 ++++++---- vyper/venom/ir_node_to_venom.py | 3 ++- vyper/venom/passes/simplify_cfg.py | 2 +- 5 files changed, 29 insertions(+), 12 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index f879426398..b1399c8b94 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -3,6 +3,7 @@ from pathlib import Path, PurePath from typing import Any, Optional +from vyper.venom.basicblock import IRBasicBlock, IRHexString, IRLabel, IRLiteral import vyper.codegen.core as codegen from vyper import ast as vy_ast from vyper.ast import natspec @@ -260,18 +261,31 @@ def venom_runtime(self): @cached_property def venom_deploytime(self): - data_sections = {"runtime_begin": self.bytecode_runtime} - if self.bytecode_metadata is not None: - data_sections["cbor_metadata"] = self.bytecode_metadata - constants = { "runtime_codesize": len(self.bytecode_runtime), "immutables_len": self.compilation_target._metadata["type"].immutable_section_bytes, } venom_ctx = generate_venom( - self.ir_nodes, self.settings, constants=constants, data_sections=data_sections + self.ir_nodes, self.settings, constants=constants ) + + main_entry = venom_ctx.entry_function + + revert_bb = IRBasicBlock(IRLabel("revert"), main_entry) + main_entry.append_basic_block(revert_bb) + revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) + + bb = IRBasicBlock(IRLabel("runtime_begin"), main_entry) + bb.is_volatile = True + main_entry.append_basic_block(bb) + bb.append_instruction("db", IRHexString(self.bytecode_runtime)) + + bb = IRBasicBlock(IRLabel("cbor_metadata"), main_entry) + bb.is_volatile = True + main_entry.append_basic_block(bb) + bb.append_instruction("db", IRHexString(self.bytecode_metadata)) + return venom_ctx @cached_property diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 07deb274ee..0e1327f72a 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -50,6 +50,7 @@ def convert_data_segment_to_function(ctx: IRContext, data_sections: list[DataSec for data_section in data_sections: bb = IRBasicBlock(data_section.label, fn) + bb.is_volatile = True fn.append_basic_block(bb) for data_item in data_section.data_items: @@ -61,7 +62,6 @@ def convert_data_segment_to_function(ctx: IRContext, data_sections: list[DataSec hex_string = IRHexString(data_item.data) bb.append_instruction("db", hex_string) - bb.append_instruction("stop") def generate_assembly_experimental( venom_ctx: IRContext, optimize: OptimizationLevel = DEFAULT_OPT_LEVEL diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 09fb942e30..6b7d731f20 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -14,7 +14,7 @@ from vyper.venom.function import IRFunction # instructions which can terminate a basic block -BB_TERMINATORS = frozenset(["jmp", "djmp", "jnz", "ret", "return", "revert", "stop", "sink"]) +BB_TERMINATORS = frozenset(["jmp", "djmp", "jnz", "ret", "return", "revert", "stop", "sink", "db"]) VOLATILE_INSTRUCTIONS = frozenset( [ @@ -492,17 +492,21 @@ class IRBasicBlock: label: IRLabel parent: IRFunction instructions: list[IRInstruction] + is_volatile: bool = False def __init__(self, label: IRLabel, parent: IRFunction) -> None: assert isinstance(label, IRLabel), "label must be an IRLabel" self.label = label self.parent = parent self.instructions = [] + self.is_volatile = False @property def out_bbs(self): assert self.is_terminated term = self.last_instruction + if term.opcode == "db": + return [] out_labels = term.get_label_operands() fn = self.parent return [fn.get_basic_block(label.name) for label in out_labels] @@ -523,8 +527,6 @@ def append_instruction( Returns the output variable if the instruction supports one """ - assert not self.is_terminated, self - if ret is None: ret = self.parent.get_next_variable() if opcode not in NO_OUTPUT_INSTRUCTIONS else None @@ -684,7 +686,7 @@ def copy(self) -> IRBasicBlock: def __repr__(self) -> str: printer = ir_printer.get() - s = f"{repr(self.label)}: ; OUT={[bb.label for bb in self.out_bbs]}\n" + s = f"{repr(self.label)}:\n" if printer and hasattr(printer, "_pre_block"): s += printer._pre_block(self) for inst in self.instructions: diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 911870ce54..7a19fee98d 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -553,6 +553,7 @@ def _convert_ir_bb(fn, ir, symbols): data_fn = fn.ctx.create_function(label.value) data_fn.clear_basic_blocks() data_bb = IRBasicBlock(label, data_fn) + data_bb.is_volatile = True data_fn.append_basic_block(data_bb) for c in ir.args[1:]: @@ -564,7 +565,7 @@ def _convert_ir_bb(fn, ir, symbols): assert isinstance(data, IRLabel) # help mypy data_bb.append_instruction("db", data) - data_bb.append_instruction("stop") + # data_bb.append_instruction("stop") elif ir.value == "label": label = IRLabel(ir.args[0].value, True) bb = fn.get_basic_block() diff --git a/vyper/venom/passes/simplify_cfg.py b/vyper/venom/passes/simplify_cfg.py index 8994318df0..669880a3a7 100644 --- a/vyper/venom/passes/simplify_cfg.py +++ b/vyper/venom/passes/simplify_cfg.py @@ -96,7 +96,7 @@ def remove_unreachable_blocks(self) -> int: removed = set() for bb in list(self.function.get_basic_blocks()): - if not self.cfg.is_reachable(bb): + if not self.cfg.is_reachable(bb) and not bb.is_volatile: self.function.remove_basic_block(bb) removed.add(bb) From 288ec0e988f289b648ebc3eeab3a084a69631a92 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 00:28:46 +0300 Subject: [PATCH 088/172] wip --- vyper/venom/ir_node_to_venom.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 7a19fee98d..17c47fd486 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -144,6 +144,12 @@ def ir_node_to_venom(ir: IRnode, symbols: Optional[dict] = None) -> IRContext: symbols = symbols or {} _convert_ir_bb(fn, ir, symbols) + revert_fn = ctx.create_function("revert") + revert_fn.clear_basic_blocks() + revert_bb = IRBasicBlock(IRLabel("revert"), revert_fn) + revert_fn.append_basic_block(revert_bb) + revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) + for fn in ctx.functions.values(): for bb in fn.get_basic_blocks(): bb.ensure_well_formed() @@ -542,14 +548,6 @@ def _convert_ir_bb(fn, ir, symbols): elif ir.value == "data": label = IRLabel(ir.args[0].value, True) - # Create revert function first (if not already created) - if "revert" not in fn.ctx.functions: - revert_fn = fn.ctx.create_function("revert") - revert_fn.clear_basic_blocks() - revert_bb = IRBasicBlock(IRLabel("revert"), revert_fn) - revert_fn.append_basic_block(revert_bb) - revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) - data_fn = fn.ctx.create_function(label.value) data_fn.clear_basic_blocks() data_bb = IRBasicBlock(label, data_fn) From b9e6bcb809934c115607beac88187e84183c91cd Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 10:05:51 +0300 Subject: [PATCH 089/172] use ctx as temp --- vyper/venom/ir_node_to_venom.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 17c47fd486..3f93f0eb88 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -144,11 +144,13 @@ def ir_node_to_venom(ir: IRnode, symbols: Optional[dict] = None) -> IRContext: symbols = symbols or {} _convert_ir_bb(fn, ir, symbols) - revert_fn = ctx.create_function("revert") - revert_fn.clear_basic_blocks() - revert_bb = IRBasicBlock(IRLabel("revert"), revert_fn) - revert_fn.append_basic_block(revert_bb) - revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) + entry_fn = fn.ctx.entry_function + if not entry_fn.has_basic_block(IRLabel("revert")): + revert_fn = ctx.create_function("revert") + revert_fn.clear_basic_blocks() + revert_bb = IRBasicBlock(IRLabel("revert"), revert_fn) + revert_fn.append_basic_block(revert_bb) + revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) for fn in ctx.functions.values(): for bb in fn.get_basic_blocks(): @@ -548,22 +550,17 @@ def _convert_ir_bb(fn, ir, symbols): elif ir.value == "data": label = IRLabel(ir.args[0].value, True) - data_fn = fn.ctx.create_function(label.value) - data_fn.clear_basic_blocks() - data_bb = IRBasicBlock(label, data_fn) - data_bb.is_volatile = True - data_fn.append_basic_block(data_bb) - + ctx.append_data_section(label) + for c in ir.args[1:]: if isinstance(c.value, bytes): hex_string = IRHexString(c.value) - data_bb.append_instruction("db", hex_string) + ctx.append_data_item(hex_string) elif isinstance(c, IRnode): data = _convert_ir_bb(fn, c, symbols) assert isinstance(data, IRLabel) # help mypy - data_bb.append_instruction("db", data) + ctx.append_data_item(data) - # data_bb.append_instruction("stop") elif ir.value == "label": label = IRLabel(ir.args[0].value, True) bb = fn.get_basic_block() From a2d61324057b14b860334e5d3d0aba2c0a0ef6ad Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 10:14:58 +0300 Subject: [PATCH 090/172] refactoring to clean up the transpilation from the rest of venom --- vyper/compiler/phases.py | 16 +++++++++-- vyper/venom/__init__.py | 51 --------------------------------- vyper/venom/ir_node_to_venom.py | 51 ++++++++++++++++++++++++++++++++- 3 files changed, 63 insertions(+), 55 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index b1399c8b94..d2e7279dce 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -3,6 +3,7 @@ from pathlib import Path, PurePath from typing import Any, Optional +from vyper.venom.ir_node_to_venom import generate_venom_from_ir from vyper.venom.basicblock import IRBasicBlock, IRHexString, IRLabel, IRLiteral import vyper.codegen.core as codegen from vyper import ast as vy_ast @@ -25,7 +26,7 @@ from vyper.semantics.types.module import ModuleT from vyper.typing import StorageLayout from vyper.utils import ERC5202_PREFIX, sha256sum -from vyper.venom import generate_assembly_experimental, generate_venom +from vyper.venom import generate_assembly_experimental, run_passes_on from vyper.warnings import VyperWarning, vyper_warn DEFAULT_CONTRACT_PATH = PurePath("VyperContract.vy") @@ -256,7 +257,12 @@ def function_signatures(self) -> dict[str, ContractFunctionT]: @cached_property def venom_runtime(self): - runtime_venom = generate_venom(self.ir_runtime, self.settings) + runtime_venom = generate_venom_from_ir(self.ir_runtime, self.settings) + + optimize = self.settings.optimize + assert optimize is not None # help mypy + run_passes_on(runtime_venom, optimize) + return runtime_venom @cached_property @@ -266,10 +272,14 @@ def venom_deploytime(self): "immutables_len": self.compilation_target._metadata["type"].immutable_section_bytes, } - venom_ctx = generate_venom( + venom_ctx = generate_venom_from_ir( self.ir_nodes, self.settings, constants=constants ) + optimize = self.settings.optimize + assert optimize is not None # help mypy + run_passes_on(venom_ctx, optimize) + main_entry = venom_ctx.entry_function revert_bb = IRBasicBlock(IRLabel("revert"), main_entry) diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 0e1327f72a..b03f2d1c26 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -13,7 +13,6 @@ from vyper.venom.basicblock import IRBasicBlock, IRHexString, IRLabel, IRLiteral from vyper.venom.context import DataSection, IRContext from vyper.venom.function import IRFunction -from vyper.venom.ir_node_to_venom import ir_node_to_venom from vyper.venom.passes import ( CSE, SCCP, @@ -40,36 +39,12 @@ DEFAULT_OPT_LEVEL = OptimizationLevel.default() -def convert_data_segment_to_function(ctx: IRContext, data_sections: list[DataSection]) -> None: - if len(data_sections) == 0: - return - - first_label = data_sections[0].label - fn = ctx.create_function(first_label.value) - fn.clear_basic_blocks() - - for data_section in data_sections: - bb = IRBasicBlock(data_section.label, fn) - bb.is_volatile = True - fn.append_basic_block(bb) - - for data_item in data_section.data_items: - if isinstance(data_item.data, IRLabel): - bb.append_instruction("db", data_item.data) - else: - # Convert bytes to IRHexString - assert isinstance(data_item.data, bytes) - hex_string = IRHexString(data_item.data) - bb.append_instruction("db", hex_string) - - def generate_assembly_experimental( venom_ctx: IRContext, optimize: OptimizationLevel = DEFAULT_OPT_LEVEL ) -> list[AssemblyInstruction]: compiler = VenomCompiler(venom_ctx) return compiler.generate_evm_assembly(optimize == OptimizationLevel.NONE) - def _run_passes(fn: IRFunction, optimize: OptimizationLevel, ac: IRAnalysesCache) -> None: # Run passes on Venom IR # TODO: Add support for optimization levels @@ -151,29 +126,3 @@ def run_passes_on(ctx: IRContext, optimize: OptimizationLevel) -> None: _run_passes(fn, optimize, ir_analyses[fn]) -def generate_venom( - ir: IRnode, - settings: Settings, - constants: dict[str, int] = None, - data_sections: dict[str, bytes] = None, -) -> IRContext: - # Convert "old" IR to "new" IR - constants = constants or {} - starting_symbols = {k: IRLiteral(v) for k, v in constants.items()} - ctx = ir_node_to_venom(ir, starting_symbols) - - data_sections = data_sections or {} - for section_name, data in data_sections.items(): - ctx.append_data_section(IRLabel(section_name)) - ctx.append_data_item(data) - - convert_data_segment_to_function(ctx, ctx.data_segment) - - for constname, value in constants.items(): - ctx.add_constant(constname, value) - - optimize = settings.optimize - assert optimize is not None # help mypy - run_passes_on(ctx, optimize) - - return ctx diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 3f93f0eb88..437cfc28da 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -7,6 +7,7 @@ from vyper.codegen.context import Alloca from vyper.codegen.ir_node import IRnode +from vyper.compiler.settings import Settings from vyper.evm.opcodes import get_opcodes from vyper.ir.compile_ir import _runtime_code_offsets from vyper.venom.basicblock import ( @@ -18,7 +19,7 @@ IRVariable, IRHexString, ) -from vyper.venom.context import IRContext +from vyper.venom.context import DataSection, IRContext from vyper.venom.function import IRFunction, IRParameter ENABLE_NEW_CALL_CONV = True @@ -129,6 +130,30 @@ def get_scratch_alloca_id() -> int: return _scratch_alloca_id +def generate_venom_from_ir( + ir: IRnode, + settings: Settings, + constants: dict[str, int] = None, + data_sections: dict[str, bytes] = None, +) -> IRContext: + # Convert "old" IR to "new" IR + constants = constants or {} + starting_symbols = {k: IRLiteral(v) for k, v in constants.items()} + ctx = ir_node_to_venom(ir, starting_symbols) + + data_sections = data_sections or {} + for section_name, data in data_sections.items(): + ctx.append_data_section(IRLabel(section_name)) + ctx.append_data_item(data) + + convert_data_segment_to_function(ctx, ctx.data_segment) + + for constname, value in constants.items(): + ctx.add_constant(constname, value) + + return ctx + + # convert IRnode directly to venom def ir_node_to_venom(ir: IRnode, symbols: Optional[dict] = None) -> IRContext: _ = ir.unique_symbols # run unique symbols check @@ -759,3 +784,27 @@ def _convert_ir_opcode(fn: IRFunction, ir: IRnode, symbols: SymbolTable) -> None if isinstance(arg, IRnode): inst_args.append(_convert_ir_bb(fn, arg, symbols)) fn.get_basic_block().append_instruction(opcode, *inst_args) + + +def convert_data_segment_to_function(ctx: IRContext, data_sections: list[DataSection]) -> None: + if len(data_sections) == 0: + return + + first_label = data_sections[0].label + fn = ctx.create_function(first_label.value) + fn.clear_basic_blocks() + + for data_section in data_sections: + bb = IRBasicBlock(data_section.label, fn) + bb.is_volatile = True + fn.append_basic_block(bb) + + for data_item in data_section.data_items: + if isinstance(data_item.data, IRLabel): + bb.append_instruction("db", data_item.data) + else: + # Convert bytes to IRHexString + assert isinstance(data_item.data, bytes) + hex_string = IRHexString(data_item.data) + bb.append_instruction("db", hex_string) + \ No newline at end of file From 047dd6b55a85d6f3c03a5ca48d4acd9a2cd1d498 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 10:23:51 +0300 Subject: [PATCH 091/172] refactor cont --- vyper/compiler/phases.py | 14 +++++++++++++- vyper/venom/ir_node_to_venom.py | 10 ---------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index d2e7279dce..d097788ab6 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -3,7 +3,7 @@ from pathlib import Path, PurePath from typing import Any, Optional -from vyper.venom.ir_node_to_venom import generate_venom_from_ir +from vyper.venom.ir_node_to_venom import convert_data_segment_to_function, generate_venom_from_ir from vyper.venom.basicblock import IRBasicBlock, IRHexString, IRLabel, IRLiteral import vyper.codegen.core as codegen from vyper import ast as vy_ast @@ -259,6 +259,16 @@ def function_signatures(self) -> dict[str, ContractFunctionT]: def venom_runtime(self): runtime_venom = generate_venom_from_ir(self.ir_runtime, self.settings) + entry_fn = runtime_venom.entry_function + if not entry_fn.has_basic_block(IRLabel("revert")): + revert_fn = runtime_venom.create_function("revert") + revert_fn.clear_basic_blocks() + revert_bb = IRBasicBlock(IRLabel("revert"), revert_fn) + revert_fn.append_basic_block(revert_bb) + revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) + + convert_data_segment_to_function(runtime_venom, runtime_venom.data_segment) + optimize = self.settings.optimize assert optimize is not None # help mypy run_passes_on(runtime_venom, optimize) @@ -296,6 +306,8 @@ def venom_deploytime(self): main_entry.append_basic_block(bb) bb.append_instruction("db", IRHexString(self.bytecode_metadata)) + convert_data_segment_to_function(venom_ctx, venom_ctx.data_segment) + return venom_ctx @cached_property diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 437cfc28da..d5fd620928 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -146,8 +146,6 @@ def generate_venom_from_ir( ctx.append_data_section(IRLabel(section_name)) ctx.append_data_item(data) - convert_data_segment_to_function(ctx, ctx.data_segment) - for constname, value in constants.items(): ctx.add_constant(constname, value) @@ -169,14 +167,6 @@ def ir_node_to_venom(ir: IRnode, symbols: Optional[dict] = None) -> IRContext: symbols = symbols or {} _convert_ir_bb(fn, ir, symbols) - entry_fn = fn.ctx.entry_function - if not entry_fn.has_basic_block(IRLabel("revert")): - revert_fn = ctx.create_function("revert") - revert_fn.clear_basic_blocks() - revert_bb = IRBasicBlock(IRLabel("revert"), revert_fn) - revert_fn.append_basic_block(revert_bb) - revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) - for fn in ctx.functions.values(): for bb in fn.get_basic_blocks(): bb.ensure_well_formed() From ddb1c4e25fe7cee52ec7e2a71c8969ac1f48e979 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 10:48:24 +0300 Subject: [PATCH 092/172] generate for all volatile basic blocks (the revert block is one of them) --- vyper/compiler/phases.py | 1 + vyper/venom/venom_to_assembly.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index d097788ab6..1a97c7944a 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -293,6 +293,7 @@ def venom_deploytime(self): main_entry = venom_ctx.entry_function revert_bb = IRBasicBlock(IRLabel("revert"), main_entry) + revert_bb.is_volatile = True main_entry.append_basic_block(revert_bb) revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 2ce5eb7dbd..69e532e316 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -184,6 +184,10 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr self._generate_evm_for_basicblock_r(asm, fn.entry, StackModel()) + for bb in fn.get_basic_blocks(): + if bb.is_volatile: + self._generate_evm_for_basicblock_r(asm, bb, StackModel()) + # asm.extend(_REVERT_POSTAMBLE) # FIXME FIXME FIXME if no_optimize is False: optimize_assembly(asm) From 94f995c4b7e85082da54508e991f15db2b4b8a39 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 10:49:02 +0300 Subject: [PATCH 093/172] remove no longer needed code --- vyper/venom/venom_to_assembly.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 69e532e316..6fc065de23 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -188,7 +188,6 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr if bb.is_volatile: self._generate_evm_for_basicblock_r(asm, bb, StackModel()) - # asm.extend(_REVERT_POSTAMBLE) # FIXME FIXME FIXME if no_optimize is False: optimize_assembly(asm) From 89d774a9f873b3d4856c91961d577d60c0d6257b Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 10:57:18 +0300 Subject: [PATCH 094/172] small test fix --- tests/unit/compiler/venom/test_venom_to_assembly.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/unit/compiler/venom/test_venom_to_assembly.py b/tests/unit/compiler/venom/test_venom_to_assembly.py index 77710e98ec..eabe990af3 100644 --- a/tests/unit/compiler/venom/test_venom_to_assembly.py +++ b/tests/unit/compiler/venom/test_venom_to_assembly.py @@ -1,4 +1,6 @@ from vyper.compiler.phases import generate_bytecode +from vyper.evm.assembler.core import PUSHLABEL +from vyper.evm.assembler.symbols import Label from vyper.venom.parser import parse_venom from vyper.venom.venom_to_assembly import VenomCompiler @@ -50,7 +52,4 @@ def test_global_vars(): """ ctx = parse_venom(code) asm = VenomCompiler(ctx).generate_evm_assembly() - bytecode, _ = generate_bytecode(asm) - print(f"0x{bytecode.hex()}") - print(asm) - assert asm == ["SWAP2", "PUSH1", 117, "POP", "MSTORE", "MSTORE", "JUMP"] \ No newline at end of file + assert asm == [Label("main"), "PUSH1", 1, "PUSH1", 2, "POP", PUSHLABEL(Label("main")), "ADD", "JUMP"] \ No newline at end of file From 114f2061c6004fa68c1213572bcb4a4741823e22 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 10:57:51 +0300 Subject: [PATCH 095/172] remove unused import from parser.py --- vyper/venom/parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index 71f951c8f3..398e8bf13e 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -3,7 +3,6 @@ from lark import Lark, Transformer -from vyper.venom import convert_data_segment_to_function from vyper.venom.basicblock import ( IRBasicBlock, IRInstruction, From 45b4f0aa889d554f94e8684546021b36769a0254 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 11:04:35 +0300 Subject: [PATCH 096/172] test update --- tests/functional/venom/parser/test_parsing.py | 61 +++++++++++-------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/tests/functional/venom/parser/test_parsing.py b/tests/functional/venom/parser/test_parsing.py index 3e73480e5e..2d134f0ebf 100644 --- a/tests/functional/venom/parser/test_parsing.py +++ b/tests/functional/venom/parser/test_parsing.py @@ -104,20 +104,24 @@ def test_data_section(): expected_ctx.add_function(entry_fn := IRFunction(IRLabel("entry"))) entry_fn.get_basic_block("entry").append_instruction("stop") - expected_ctx.data_segment = [ - DataSection( - IRLabel("selector_buckets"), - [ - DataItem(IRLabel("selector_bucket_0")), - DataItem(IRLabel("fallback")), - DataItem(IRLabel("selector_bucket_2")), - DataItem(IRLabel("selector_bucket_3")), - DataItem(IRLabel("fallback")), - DataItem(IRLabel("selector_bucket_5")), - DataItem(IRLabel("selector_bucket_6")), - ], - ) - ] + expected_ctx.add_function(revert_fn := IRFunction(IRLabel("revert"))) + revert_fn.clear_basic_blocks() + revert_bb = IRBasicBlock(IRLabel("revert"), revert_fn) + revert_fn.append_basic_block(revert_bb) + revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) + + expected_ctx.add_function(data_fn := IRFunction(IRLabel("selector_buckets"))) + data_fn.clear_basic_blocks() + data_bb = IRBasicBlock(IRLabel("selector_buckets"), data_fn) + data_fn.append_basic_block(data_bb) + data_bb.append_instruction("db", IRLabel("selector_bucket_0")) + data_bb.append_instruction("db", IRLabel("fallback")) + data_bb.append_instruction("db", IRLabel("selector_bucket_2")) + data_bb.append_instruction("db", IRLabel("selector_bucket_3")) + data_bb.append_instruction("db", IRLabel("fallback")) + data_bb.append_instruction("db", IRLabel("selector_bucket_5")) + data_bb.append_instruction("db", IRLabel("selector_bucket_6")) + data_bb.append_instruction("stop") assert_ctx_eq(parsed_ctx, expected_ctx) @@ -234,18 +238,23 @@ def test_multi_function_and_data(): check_fn.append_basic_block(value_bb := IRBasicBlock(IRLabel("has_value"), check_fn)) value_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) - expected_ctx.data_segment = [ - DataSection( - IRLabel("selector_buckets"), - [ - DataItem(IRLabel("selector_bucket_0")), - DataItem(IRLabel("fallback")), - DataItem(IRLabel("selector_bucket_2")), - DataItem(IRLabel("selector_bucket_3")), - DataItem(IRLabel("selector_bucket_6")), - ], - ) - ] + # Revert function is automatically created with data segments + expected_ctx.add_function(revert_fn := IRFunction(IRLabel("revert"))) + revert_fn.clear_basic_blocks() + revert_bb = IRBasicBlock(IRLabel("revert"), revert_fn) + revert_fn.append_basic_block(revert_bb) + revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) + + # Data segment is now converted to a function + expected_ctx.add_function(data_fn := IRFunction(IRLabel("selector_buckets"))) + data_fn.clear_basic_blocks() + data_bb = IRBasicBlock(IRLabel("selector_buckets"), data_fn) + data_fn.append_basic_block(data_bb) + data_bb.append_instruction("db", IRLabel("selector_bucket_0")) + data_bb.append_instruction("db", IRLabel("fallback")) + data_bb.append_instruction("db", IRLabel("selector_bucket_2")) + data_bb.append_instruction("db", IRLabel("selector_bucket_3")) + data_bb.append_instruction("db", IRLabel("selector_bucket_6")) assert_ctx_eq(parsed_ctx, expected_ctx) From dbbfa238d85bb2fa4d458e20a7a12f448af38b8f Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 12:02:43 +0300 Subject: [PATCH 097/172] wip --- vyper/compiler/phases.py | 8 ++++---- vyper/venom/analysis/liveness.py | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 1a97c7944a..0f1bc2149e 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -286,10 +286,6 @@ def venom_deploytime(self): self.ir_nodes, self.settings, constants=constants ) - optimize = self.settings.optimize - assert optimize is not None # help mypy - run_passes_on(venom_ctx, optimize) - main_entry = venom_ctx.entry_function revert_bb = IRBasicBlock(IRLabel("revert"), main_entry) @@ -309,6 +305,10 @@ def venom_deploytime(self): convert_data_segment_to_function(venom_ctx, venom_ctx.data_segment) + optimize = self.settings.optimize + assert optimize is not None # help mypy + run_passes_on(venom_ctx, optimize) + return venom_ctx @cached_property diff --git a/vyper/venom/analysis/liveness.py b/vyper/venom/analysis/liveness.py index b10e596107..5d84c587e3 100644 --- a/vyper/venom/analysis/liveness.py +++ b/vyper/venom/analysis/liveness.py @@ -27,6 +27,9 @@ def analyze(self): self.inst_to_liveness[inst] = OrderedSet() worklist = deque(self.cfg.dfs_post_walk) + for bb in self.function.get_basic_blocks(): + if bb.is_volatile: + worklist.append(bb) while len(worklist) > 0: changed = False From 9faf37d792cd69fe348ffa36bc84a3b33425c0db Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 15:22:52 +0300 Subject: [PATCH 098/172] wip --- vyper/evm/assembler/core.py | 32 ++++++++++++++++++++++++++++---- vyper/venom/venom_to_assembly.py | 6 ++---- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 507e044f83..12ad864e74 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -19,6 +19,30 @@ def num_to_bytearray(x): return o +class Label: + def __init__(self, label: str): + assert isinstance(label, str) + self.label = label + + def __repr__(self): + return f"LABEL {self.label}" + + def __eq__(self, other): + if not isinstance(other, Label): + return False + return self.label == other.label + + def __hash__(self): + return hash(self.label) + +class JUMPDEST: + def __init__(self, label: Label): + assert isinstance(label, Label), label + self.label = label + + def __repr__(self): + return f"JUMPDEST {self.label.label}" + @dataclass class DataHeader: label: Label @@ -133,7 +157,7 @@ def is_ofst(assembly_item): AssemblyInstruction = ( - str | TaggedInstruction | int | PUSHLABEL | Label | PUSH_OFST | DATA_ITEM | DataHeader | CONST + str | TaggedInstruction | int | PUSHLABEL | JUMPDEST | PUSH_OFST | DATA_ITEM | DataHeader | CONST ) @@ -218,8 +242,8 @@ def resolve_symbols( continue # CONST declarations do not go into bytecode # update pc - if isinstance(item, Label): - _add_to_symbol_map(symbol_map, item, pc) + if isinstance(item, JUMPDEST): + _add_to_symbol_map(symbol_map, item.label, pc) pc += 1 # jumpdest elif isinstance(item, DataHeader): @@ -393,7 +417,7 @@ def _assembly_to_evm( bytecode = _compile_push_instruction(PUSH_N(symbol_map[label], n=SYMBOL_SIZE)) ret.extend(bytecode) - elif isinstance(item, Label): + elif isinstance(item, JUMPDEST): jumpdest_opcode = get_opcodes()["JUMPDEST"][0] assert jumpdest_opcode is not None # help mypy ret.append(jumpdest_opcode) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 6fc065de23..ab99930e9b 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -4,6 +4,7 @@ from vyper.evm.assembler.core import ( DATA_ITEM, + JUMPDEST, PUSH, PUSH_OFST, PUSHLABEL, @@ -113,9 +114,6 @@ ] ) -_REVERT_POSTAMBLE = [Label("revert"), *PUSH(0), "DUP1", "REVERT"] - - def apply_line_numbers(inst: IRInstruction, asm) -> list[str]: ret = [] for op in asm: @@ -326,7 +324,7 @@ def _generate_evm_for_basicblock_r( asm = [] # assembly entry point into the block - asm.append(_as_asm_symbol(basicblock.label)) + asm.append(JUMPDEST(_as_asm_symbol(basicblock.label))) fn = basicblock.parent if basicblock == fn.entry: From df944c1168d5b0dcc159c66cac804ebf98e548e1 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 15:33:05 +0300 Subject: [PATCH 099/172] fix optimizer --- vyper/evm/assembler/optimizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vyper/evm/assembler/optimizer.py b/vyper/evm/assembler/optimizer.py index e8cd02e161..f251fdf76f 100644 --- a/vyper/evm/assembler/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -1,4 +1,4 @@ -from vyper.evm.assembler.core import DATA_ITEM, PUSHLABEL, DataHeader, Label, is_symbol +from vyper.evm.assembler.core import DATA_ITEM, JUMPDEST, PUSHLABEL, DataHeader, Label, is_symbol from vyper.evm.constants import COMMUTATIVE_OPS from vyper.exceptions import CompilerPanic @@ -14,7 +14,7 @@ def _prune_unreachable_code(assembly): if assembly[i] in _TERMINAL_OPS: # find the next jumpdest or data section for j in range(i + 1, len(assembly)): - next_is_reachable = isinstance(assembly[j], (Label, DataHeader)) + next_is_reachable = isinstance(assembly[j], (JUMPDEST, DataHeader)) if next_is_reachable: break else: From e39dc8b8294b32ac9137cbf2bc49c9a505551a00 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 15:47:22 +0300 Subject: [PATCH 100/172] tmp --- vyper/evm/assembler/optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/evm/assembler/optimizer.py b/vyper/evm/assembler/optimizer.py index f251fdf76f..49163734c4 100644 --- a/vyper/evm/assembler/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -178,7 +178,7 @@ def _prune_unused_jumpdests(assembly): # delete jumpdests that aren't used i = 0 while i < len(assembly): - if is_symbol(assembly[i]) and assembly[i] not in used_jumpdests: + if isinstance(assembly[i], JUMPDEST) and assembly[i].label not in used_jumpdests: changed = True del assembly[i] else: From c036dc46b69e5c15f065f32722a8cac8addc8a94 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 16:06:42 +0300 Subject: [PATCH 101/172] make optimizer aware of the new system --- vyper/evm/assembler/core.py | 5 +++++ vyper/evm/assembler/optimizer.py | 28 ++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 12ad864e74..ac1c62badf 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -250,6 +250,9 @@ def resolve_symbols( # Don't increment pc as the symbol itself doesn't go into code _add_to_symbol_map(symbol_map, item.label, pc) + elif isinstance(item, Label): + _add_to_symbol_map(symbol_map, item, pc) + elif isinstance(item, PUSHLABEL): pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits @@ -410,6 +413,8 @@ def _assembly_to_evm( continue # CONST things do not show up in bytecode elif isinstance(item, DataHeader): continue # DataHeader does not show up in bytecode + elif isinstance(item, Label): + continue # Label does not show up in bytecode elif isinstance(item, PUSHLABEL): # push a symbol to stack diff --git a/vyper/evm/assembler/optimizer.py b/vyper/evm/assembler/optimizer.py index 49163734c4..5a89467c51 100644 --- a/vyper/evm/assembler/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -14,7 +14,7 @@ def _prune_unreachable_code(assembly): if assembly[i] in _TERMINAL_OPS: # find the next jumpdest or data section for j in range(i + 1, len(assembly)): - next_is_reachable = isinstance(assembly[j], (JUMPDEST, DataHeader)) + next_is_reachable = isinstance(assembly[j], (JUMPDEST, DataHeader, Label, DATA_ITEM)) if next_is_reachable: break else: @@ -163,24 +163,36 @@ def _merge_iszero(assembly): def _prune_unused_jumpdests(assembly): changed = False - used_jumpdests: set[Label] = set() + used_as_jumpdests: set[Label] = set() + used_as_labels: set[Label] = set() # find all used jumpdests - for item in assembly: + for i, item in enumerate(assembly): if isinstance(item, PUSHLABEL): - used_jumpdests.add(item.label) + # only add if the next item is a jump instruction + if i + 1 < len(assembly) and assembly[i + 1] in ("JUMP", "JUMPI"): + used_as_jumpdests.add(item.label) + else: + used_as_labels.add(item.label) if isinstance(item, DATA_ITEM) and isinstance(item.data, Label): # add symbols used in data sections as they are likely # used for a jumptable. - used_jumpdests.add(item.data) + used_as_jumpdests.add(item.data) # delete jumpdests that aren't used i = 0 while i < len(assembly): - if isinstance(assembly[i], JUMPDEST) and assembly[i].label not in used_jumpdests: - changed = True - del assembly[i] + if isinstance(assembly[i], JUMPDEST): + if assembly[i].label in used_as_jumpdests: + i += 1 + elif assembly[i].label in used_as_labels: + changed = True + assembly[i] = assembly[i].label + i += 1 + else: + changed = True + del assembly[i] else: i += 1 From 26baba03007f8e290f937cd1de972a57183e13dd Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 24 Jun 2025 16:16:13 +0300 Subject: [PATCH 102/172] and finaly dataheader is out --- .../builtins/codegen/test_create_functions.py | 4 ++-- vyper/compiler/output.py | 2 +- vyper/evm/assembler/core.py | 18 ++---------------- vyper/evm/assembler/optimizer.py | 4 ++-- vyper/ir/compile_ir.py | 7 +++---- vyper/venom/venom_to_assembly.py | 1 - 6 files changed, 10 insertions(+), 26 deletions(-) diff --git a/tests/functional/builtins/codegen/test_create_functions.py b/tests/functional/builtins/codegen/test_create_functions.py index 3b7200f218..e8244b86e8 100644 --- a/tests/functional/builtins/codegen/test_create_functions.py +++ b/tests/functional/builtins/codegen/test_create_functions.py @@ -6,7 +6,7 @@ import vyper.ir.compile_ir as compile_ir from tests.utils import ZERO_ADDRESS from vyper.compiler import compile_code -from vyper.ir.compile_ir import DATA_ITEM, PUSH, PUSHLABEL, DataHeader, Label +from vyper.ir.compile_ir import DATA_ITEM, PUSH, PUSHLABEL, Label from vyper.utils import EIP_170_LIMIT, ERC5202_PREFIX, checksum_encode, keccak256 @@ -302,7 +302,7 @@ def test(code_ofst: uint256) -> address: *PUSH(initcode_len), *PUSH(0), "RETURN", - DataHeader(Label("end")), + Label("end"), DATA_ITEM(b"\x00" * initcode_len), ] bytecode, _ = compile_ir.assembly_to_evm(asm) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 18736f7b72..82dc97ddf2 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -337,7 +337,7 @@ def _build_asm(asm_list): output_string = "__entry__:" in_push = 0 for item in asm_list: - if isinstance(item, (compile_ir.Label, compile_ir.DataHeader)): + if isinstance(item, compile_ir.Label): output_string += f"\n\n{item}:" continue diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index ac1c62badf..a6fbf6dbe1 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -43,14 +43,6 @@ def __init__(self, label: Label): def __repr__(self): return f"JUMPDEST {self.label.label}" -@dataclass -class DataHeader: - label: Label - - def __repr__(self): - return f"DATA {self.label.label}" - - class PUSHLABEL: def __init__(self, label: Label): assert isinstance(label, Label), label @@ -157,7 +149,7 @@ def is_ofst(assembly_item): AssemblyInstruction = ( - str | TaggedInstruction | int | PUSHLABEL | JUMPDEST | PUSH_OFST | DATA_ITEM | DataHeader | CONST + str | TaggedInstruction | int | PUSHLABEL | JUMPDEST | PUSH_OFST | DATA_ITEM | CONST ) @@ -246,10 +238,6 @@ def resolve_symbols( _add_to_symbol_map(symbol_map, item.label, pc) pc += 1 # jumpdest - elif isinstance(item, DataHeader): - # Don't increment pc as the symbol itself doesn't go into code - _add_to_symbol_map(symbol_map, item.label, pc) - elif isinstance(item, Label): _add_to_symbol_map(symbol_map, item, pc) @@ -336,7 +324,7 @@ def note_breakpoint(line_number_map, pc, item): def get_data_segment_lengths(assembly: list[AssemblyInstruction]) -> list[int]: ret = [] for item in assembly: - if isinstance(item, DataHeader): + if isinstance(item, Label): ret.append(0) continue if len(ret) == 0: @@ -411,8 +399,6 @@ def _assembly_to_evm( continue # skippable opcodes elif isinstance(item, CONST): continue # CONST things do not show up in bytecode - elif isinstance(item, DataHeader): - continue # DataHeader does not show up in bytecode elif isinstance(item, Label): continue # Label does not show up in bytecode diff --git a/vyper/evm/assembler/optimizer.py b/vyper/evm/assembler/optimizer.py index 5a89467c51..11bf5d2de7 100644 --- a/vyper/evm/assembler/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -1,4 +1,4 @@ -from vyper.evm.assembler.core import DATA_ITEM, JUMPDEST, PUSHLABEL, DataHeader, Label, is_symbol +from vyper.evm.assembler.core import DATA_ITEM, JUMPDEST, PUSHLABEL, Label, is_symbol from vyper.evm.constants import COMMUTATIVE_OPS from vyper.exceptions import CompilerPanic @@ -14,7 +14,7 @@ def _prune_unreachable_code(assembly): if assembly[i] in _TERMINAL_OPS: # find the next jumpdest or data section for j in range(i + 1, len(assembly)): - next_is_reachable = isinstance(assembly[j], (JUMPDEST, DataHeader, Label, DATA_ITEM)) + next_is_reachable = isinstance(assembly[j], (JUMPDEST, Label, DATA_ITEM)) if next_is_reachable: break else: diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 41e3e674a6..37f16586a4 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -18,7 +18,6 @@ PUSH_OFST, PUSHLABEL, AssemblyInstruction, - DataHeader, Label, TaggedInstruction, assembly_to_evm, @@ -537,7 +536,7 @@ def _height_of(varname): o.extend(["RETURN"]) - self.data_segments.append([DataHeader(runtime_begin), DATA_ITEM(runtime_bytecode)]) + self.data_segments.append([Label(runtime_begin), DATA_ITEM(runtime_bytecode)]) if self.compiler_metadata is not None: # we should issue the cbor-encoded metadata. @@ -548,7 +547,7 @@ def _height_of(varname): immutables_len, ) - segment: list[AssemblyInstruction] = [DataHeader(Label("cbor_metadata"))] + segment: list[AssemblyInstruction] = [Label("cbor_metadata")] segment.append(DATA_ITEM(bytecode_suffix)) self.data_segments.append(segment) @@ -649,7 +648,7 @@ def _height_of(varname): if code.value == "data": assert isinstance(code.args[0].value, str) # help mypy - data_header = DataHeader(Label(code.args[0].value)) + data_header = Label(code.args[0].value) data_items = [] for c in code.args[1:]: diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index ab99930e9b..787e7a23fa 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -9,7 +9,6 @@ PUSH_OFST, PUSHLABEL, AssemblyInstruction, - DataHeader, Label, TaggedInstruction, ) From dc863a1717da798a5e86f03a4a107e1731ff6799 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 25 Jun 2025 10:35:05 +0300 Subject: [PATCH 103/172] tags for parser --- vyper/venom/parser.py | 103 ++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 63 deletions(-) diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index 398e8bf13e..5bbfca97b6 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -12,7 +12,7 @@ IRVariable, IRHexString, ) -from vyper.venom.context import DataItem, DataSection, IRContext +from vyper.venom.context import IRContext from vyper.venom.function import IRFunction VENOM_GRAMMAR = """ @@ -28,7 +28,7 @@ # Allow multiple comment styles COMMENT: ";" /[^\\n]*/ | "//" /[^\\n]*/ | "#" /[^\\n]*/ - start: (global_label | function)* data_segment? + start: (global_label | function)* # Global label definitions with optional address override global_label: label_name ":" CONST @@ -37,7 +37,10 @@ block_content: (label_decl | statement)* - label_decl: (IDENT | ESCAPED_STRING) ":" ("@" CONST)? NEWLINE+ + label_decl: (IDENT | ESCAPED_STRING) ":" ("@" CONST)? ("[" tag_list "]")? NEWLINE+ + + tag_list: tag ("," tag)* + tag: IDENT statement: (assignment | instruction) NEWLINE+ assignment: VAR_IDENT "=" expr @@ -57,10 +60,6 @@ label_name: IDENT | ESCAPED_STRING label_ref: "@" (IDENT | ESCAPED_STRING) - data_segment: "data" "readonly" "{" data_section* "}" - data_section: label_name ":" NEWLINE+ data_item+ - data_item: DB (HEXSTR | label_ref) NEWLINE+ - DOUBLE_QUOTE: "\\"" IDENT: (DIGIT|LETTER|"_")+ DB: "db" @@ -109,34 +108,30 @@ def __init__(self, children: list) -> None: self.children = children -class _DataSegment(_TypedItem): +class _GlobalLabel(_TypedItem): pass -class _GlobalLabel(_TypedItem): - pass class _LabelDecl: """Represents a block declaration in the parse tree.""" - def __init__(self, label: str, address: Optional[int] = None) -> None: + def __init__(self, label: str, address: Optional[int] = None, tags: Optional[list[str]] = None) -> None: self.label = label self.address = address + self.tags = tags or [] class VenomTransformer(Transformer): def start(self, children) -> IRContext: ctx = IRContext() - # Separate global labels, functions, and data segments + # Separate global labels and functions global_labels = [] funcs = [] - data_segment = None for child in children: if isinstance(child, _GlobalLabel): global_labels.append(child) - elif isinstance(child, _DataSegment): - data_segment = child else: funcs.append(child) @@ -145,7 +140,7 @@ def start(self, children) -> IRContext: name, address = global_label.children ctx.add_global_label(name, address) - # Process functions first + # Process functions for fn_name, items in funcs: fn = ctx.create_function(fn_name) if ctx.entry_function is None: @@ -161,15 +156,17 @@ def start(self, children) -> IRContext: # the next label or end of function. current_block_label: Optional[str] = None current_block_address: Optional[int] = None + current_block_tags: list[str] = [] current_block_instructions: list[IRInstruction] = [] - blocks: list[tuple[str, Optional[int], list[IRInstruction]]] = [] + blocks: list[tuple[str, Optional[int], list[IRInstruction], list[str]]] = [] for item in items: if isinstance(item, _LabelDecl): if current_block_label is not None: - blocks.append((current_block_label, current_block_address, current_block_instructions)) + blocks.append((current_block_label, current_block_address, current_block_instructions, current_block_tags)) current_block_label = item.label current_block_address = item.address + current_block_tags = item.tags current_block_instructions = [] elif isinstance(item, IRInstruction): if current_block_label is None: @@ -177,28 +174,28 @@ def start(self, children) -> IRContext: current_block_instructions.append(item) if current_block_label is not None: - blocks.append((current_block_label, current_block_address, current_block_instructions)) + blocks.append((current_block_label, current_block_address, current_block_instructions, current_block_tags)) for block_data in blocks: - # All blocks now have: (block_name, address, instructions) - block_name, address, instructions = block_data + # All blocks now have: (block_name, address, instructions, tags) + block_name, address, instructions, tags = block_data if address is not None: bb = IRBasicBlock(IRLabel(block_name, True, address), fn) else: bb = IRBasicBlock(IRLabel(block_name, True), fn) + # Set is_volatile if "pinned" tag is present + if "pinned" in tags: + bb.is_volatile = True + fn.append_basic_block(bb) for instruction in instructions: assert isinstance(instruction, IRInstruction) # help mypy bb.insert_instruction(instruction) - # Process data segment after functions by converting it to a regular function - if data_segment: - self._add_revert_postamble_function(ctx) - convert_data_segment_to_function(ctx, data_segment.children) + _set_last_var(fn) - _set_last_var(fn) _set_last_label(ctx) return ctx @@ -216,50 +213,24 @@ def block_content(self, children) -> list: return children def label_decl(self, children) -> _LabelDecl: - # children[0] is the label, optional address, then NEWLINE tokens + # children[0] is the label, optional address, optional tags, then NEWLINE tokens label = _unescape(str(children[0])) address = None - if len(children) > 1 and isinstance(children[1], IRLiteral): - address = children[1].value - return _LabelDecl(label, address) + tags = [] + + # Process children after the label + for child in children[1:]: + if isinstance(child, IRLiteral): + address = child.value + elif isinstance(child, list): # tag_list returns a list + tags = child + + return _LabelDecl(label, address, tags) def statement(self, children) -> IRInstruction: # children[0] is the instruction/assignment, rest are NEWLINE tokens return children[0] - def data_segment(self, children) -> _DataSegment: - return _DataSegment(children) - - def data_section(self, children) -> DataSection: - label = IRLabel(children[0], True) - # skip NEWLINE tokens and collect DataItems - data_items = [child for child in children[1:] if isinstance(child, DataItem)] - return DataSection(label, data_items) - - def data_item(self, children) -> DataItem: - # children[0] is the DB "IDENT", children[1] is the data content, rest are NEWLINE tokens - assert children[0] == "db", f"Expected 'db', got {children[0]}" - item = children[1] - if isinstance(item, IRLabel): - return DataItem(item) - - # handle hex strings - assert isinstance(item, str) - assert item.startswith('x"') - assert item.endswith('"') - item = item.removeprefix('x"').removesuffix('"') - item = item.replace("_", "") - return DataItem(bytes.fromhex(item)) - - def _add_revert_postamble_function(self, ctx: IRContext) -> None: - fn = ctx.create_function("revert") - - fn.clear_basic_blocks() - bb = IRBasicBlock(IRLabel("revert"), fn) - fn.append_basic_block(bb) - - bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) - def assignment(self, children) -> IRInstruction: to, value = children if isinstance(value, IRInstruction): @@ -346,6 +317,12 @@ def DB(self, val) -> str: def HEXSTR(self, val) -> str: return val.value + def tag_list(self, children) -> list[str]: + return children + + def tag(self, children) -> str: + return str(children[0]) + def parse_venom(source: str) -> IRContext: tree = VENOM_PARSER.parse(source) From 97e80111edfb5cb5e97237fa7645e97e1d7c196a Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 25 Jun 2025 10:35:13 +0300 Subject: [PATCH 104/172] remove dead code --- vyper/venom/context.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/vyper/venom/context.py b/vyper/venom/context.py index f9f4221b2f..e859663fd1 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -122,10 +122,4 @@ def __repr__(self) -> str: s.append(IRFunction.__repr__(fn)) s.append("\n") - # if len(self.data_segment) > 0: - # s.append("data readonly {") - # for data_section in self.data_segment: - # s.append(textwrap.indent(DataSection.__str__(data_section), " ")) - # s.append("}") - return "\n".join(s) From 5b67b0cea2223c5f11778a8f726542d343f0020e Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 25 Jun 2025 10:39:36 +0300 Subject: [PATCH 105/172] is_volatile -> is_pinned and parser wip --- vyper/compiler/phases.py | 6 +++--- vyper/venom/analysis/liveness.py | 2 +- vyper/venom/basicblock.py | 15 ++++++++++++--- vyper/venom/ir_node_to_venom.py | 2 +- vyper/venom/parser.py | 2 +- vyper/venom/passes/simplify_cfg.py | 2 +- vyper/venom/venom_to_assembly.py | 2 +- 7 files changed, 20 insertions(+), 11 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 0f1bc2149e..17c4885e27 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -289,17 +289,17 @@ def venom_deploytime(self): main_entry = venom_ctx.entry_function revert_bb = IRBasicBlock(IRLabel("revert"), main_entry) - revert_bb.is_volatile = True + revert_bb.is_pinned = True main_entry.append_basic_block(revert_bb) revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) bb = IRBasicBlock(IRLabel("runtime_begin"), main_entry) - bb.is_volatile = True + bb.is_pinned = True main_entry.append_basic_block(bb) bb.append_instruction("db", IRHexString(self.bytecode_runtime)) bb = IRBasicBlock(IRLabel("cbor_metadata"), main_entry) - bb.is_volatile = True + bb.is_pinned = True main_entry.append_basic_block(bb) bb.append_instruction("db", IRHexString(self.bytecode_metadata)) diff --git a/vyper/venom/analysis/liveness.py b/vyper/venom/analysis/liveness.py index 5d84c587e3..69cbfa7e62 100644 --- a/vyper/venom/analysis/liveness.py +++ b/vyper/venom/analysis/liveness.py @@ -28,7 +28,7 @@ def analyze(self): worklist = deque(self.cfg.dfs_post_walk) for bb in self.function.get_basic_blocks(): - if bb.is_volatile: + if bb.is_pinned: worklist.append(bb) while len(worklist) > 0: diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 6b7d731f20..6aad3ba869 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -492,14 +492,17 @@ class IRBasicBlock: label: IRLabel parent: IRFunction instructions: list[IRInstruction] - is_volatile: bool = False + + # is_pinned is used to indicate if the basic block is pinned and cannot + # be optimized out. + is_pinned: bool = False def __init__(self, label: IRLabel, parent: IRFunction) -> None: assert isinstance(label, IRLabel), "label must be an IRLabel" self.label = label self.parent = parent self.instructions = [] - self.is_volatile = False + self.is_pinned = False @property def out_bbs(self): @@ -686,7 +689,13 @@ def copy(self) -> IRBasicBlock: def __repr__(self) -> str: printer = ir_printer.get() - s = f"{repr(self.label)}:\n" + s = f"{repr(self.label)}:" + + if self.is_pinned: + s += " [pinned]\n" + else: + s += "\n" + if printer and hasattr(printer, "_pre_block"): s += printer._pre_block(self) for inst in self.instructions: diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index d5fd620928..844022f1fa 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -786,7 +786,7 @@ def convert_data_segment_to_function(ctx: IRContext, data_sections: list[DataSec for data_section in data_sections: bb = IRBasicBlock(data_section.label, fn) - bb.is_volatile = True + bb.is_pinned = True fn.append_basic_block(bb) for data_item in data_section.data_items: diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index 5bbfca97b6..a9c7c88ba2 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -186,7 +186,7 @@ def start(self, children) -> IRContext: # Set is_volatile if "pinned" tag is present if "pinned" in tags: - bb.is_volatile = True + bb.is_pinned = True fn.append_basic_block(bb) diff --git a/vyper/venom/passes/simplify_cfg.py b/vyper/venom/passes/simplify_cfg.py index 669880a3a7..35b305392e 100644 --- a/vyper/venom/passes/simplify_cfg.py +++ b/vyper/venom/passes/simplify_cfg.py @@ -96,7 +96,7 @@ def remove_unreachable_blocks(self) -> int: removed = set() for bb in list(self.function.get_basic_blocks()): - if not self.cfg.is_reachable(bb) and not bb.is_volatile: + if not self.cfg.is_reachable(bb) and not bb.is_pinned: self.function.remove_basic_block(bb) removed.add(bb) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 787e7a23fa..1cf9131888 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -182,7 +182,7 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr self._generate_evm_for_basicblock_r(asm, fn.entry, StackModel()) for bb in fn.get_basic_blocks(): - if bb.is_volatile: + if bb.is_pinned: self._generate_evm_for_basicblock_r(asm, bb, StackModel()) if no_optimize is False: From 21f65123cc3fb662a00ba627ad2b360d7457427b Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 25 Jun 2025 10:53:11 +0300 Subject: [PATCH 106/172] remove unnecessary assertion in IRBasicBlock class --- vyper/venom/basicblock.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 6aad3ba869..3c9665cc4e 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -571,7 +571,6 @@ def insert_instruction(self, instruction: IRInstruction, index: Optional[int] = assert isinstance(instruction, IRInstruction), "instruction must be an IRInstruction" if index is None: - assert not self.is_terminated, (self, instruction) index = len(self.instructions) instruction.parent = self fn = self.parent From d8e524e4417dd561b8eacb45ec5a2399fd812581 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 25 Jun 2025 11:25:22 +0300 Subject: [PATCH 107/172] instead of asserting ignore not data emiting instructions when counting --- vyper/evm/assembler/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index a6fbf6dbe1..2a7b9841dc 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -330,7 +330,8 @@ def get_data_segment_lengths(assembly: list[AssemblyInstruction]) -> list[int]: if len(ret) == 0: # haven't yet seen a data header continue - assert isinstance(item, DATA_ITEM) + if not isinstance(item, DATA_ITEM): + continue if is_symbol(item.data): ret[-1] += SYMBOL_SIZE elif isinstance(item.data, bytes): From 3cce52317e7b9cfec92fb68f4f89056605b7fa9c Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 25 Jun 2025 11:29:22 +0300 Subject: [PATCH 108/172] temp test --- tests/functional/venom/parser/test_parsing.py | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/tests/functional/venom/parser/test_parsing.py b/tests/functional/venom/parser/test_parsing.py index 2d134f0ebf..e304e15e31 100644 --- a/tests/functional/venom/parser/test_parsing.py +++ b/tests/functional/venom/parser/test_parsing.py @@ -121,7 +121,6 @@ def test_data_section(): data_bb.append_instruction("db", IRLabel("fallback")) data_bb.append_instruction("db", IRLabel("selector_bucket_5")) data_bb.append_instruction("db", IRLabel("selector_bucket_6")) - data_bb.append_instruction("stop") assert_ctx_eq(parsed_ctx, expected_ctx) @@ -449,4 +448,36 @@ def test_labels_with_addresses_used_in_function(): add_inst = other_bb.instructions[0] assert add_inst.opcode == "add" - assert add_inst.operands[0].value == "my_global" \ No newline at end of file + assert add_inst.operands[0].value == "my_global" + + +def test_labels_with_tags(): + source = """ + function main { + start: + nop + revert: @0x100 [pinned] + revert 0, 0 + special: [tag1, pinned, tag2] + nop + normal: @0x200 + stop + } + """ + + parsed_ctx = parse_venom(source) + fn = list(parsed_ctx.functions.values())[0] + + start_bb = fn.get_basic_block("start") + assert not start_bb.is_pinned, "start block should not be volatile" + + revert_bb = fn.get_basic_block("revert") + assert revert_bb.is_pinned, "revert block should be volatile due to pinned tag" + assert revert_bb.label.address == 0x100, "revert block should have address 0x100" + + special_bb = fn.get_basic_block("special") + assert special_bb.is_pinned, "special block should be volatile due to pinned tag" + + normal_bb = fn.get_basic_block("normal") + assert not normal_bb.is_pinned, "normal block should not be volatile" + assert normal_bb.label.address == 0x200, "normal block should have address 0x200" \ No newline at end of file From fe979c8609ee19e986e5fe143a42ebaa34b37e7d Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 25 Jun 2025 13:08:13 +0300 Subject: [PATCH 109/172] fix assert_unreachable --- vyper/venom/venom_to_assembly.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 1cf9131888..5e23b4bec7 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -568,7 +568,7 @@ def _generate_evm_for_instruction( assembly.extend(["ISZERO", PUSHLABEL(Label("revert")), "JUMPI"]) elif opcode == "assert_unreachable": end_symbol = self.mklabel("reachable") - assembly.extend([PUSHLABEL(end_symbol), "JUMPI", "INVALID", end_symbol]) + assembly.extend([PUSHLABEL(end_symbol), "JUMPI", "INVALID", JUMPDEST(end_symbol)]) elif opcode == "iload": addr = inst.operands[0] mem_deploy_end = self.ctx.constants["mem_deploy_end"] From 4b52a04563892e030a649d9347b1df0c545bcce5 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 25 Jun 2025 13:35:03 +0300 Subject: [PATCH 110/172] fix in old pipeline --- vyper/ir/compile_ir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 37f16586a4..92aa56e1d2 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -536,7 +536,7 @@ def _height_of(varname): o.extend(["RETURN"]) - self.data_segments.append([Label(runtime_begin), DATA_ITEM(runtime_bytecode)]) + self.data_segments.append([runtime_begin, DATA_ITEM(runtime_bytecode)]) if self.compiler_metadata is not None: # we should issue the cbor-encoded metadata. From 6a9244bf40d91fe0c4ab258d69002d0da7500aa7 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 25 Jun 2025 13:36:13 +0300 Subject: [PATCH 111/172] jumpdest detectiong hack --- vyper/evm/assembler/core.py | 7 ++++--- vyper/evm/assembler/optimizer.py | 6 ++++-- vyper/venom/venom_to_assembly.py | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 2a7b9841dc..442fc05109 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -21,7 +21,7 @@ def num_to_bytearray(x): class Label: def __init__(self, label: str): - assert isinstance(label, str) + assert isinstance(label, str), f"invalid label {type(label)} {label}" self.label = label def __repr__(self): @@ -322,10 +322,11 @@ def note_breakpoint(line_number_map, pc, item): # predict what length of an assembly [data] node will be in bytecode def get_data_segment_lengths(assembly: list[AssemblyInstruction]) -> list[int]: - ret = [] + ret = [0] for item in assembly: if isinstance(item, Label): - ret.append(0) + if len(ret) > 0 and ret[-1] > 0: + ret.append(0) continue if len(ret) == 0: # haven't yet seen a data header diff --git a/vyper/evm/assembler/optimizer.py b/vyper/evm/assembler/optimizer.py index 11bf5d2de7..17056458ed 100644 --- a/vyper/evm/assembler/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -169,8 +169,10 @@ def _prune_unused_jumpdests(assembly): # find all used jumpdests for i, item in enumerate(assembly): if isinstance(item, PUSHLABEL): - # only add if the next item is a jump instruction - if i + 1 < len(assembly) and assembly[i + 1] in ("JUMP", "JUMPI"): + # only add if the next item is a jump instruction, or a pushlabel + # which happens when pushing return labels for inline calls. + # TODO: this is a hack and we should have a better way to handle this. + if i + 1 < len(assembly) and (assembly[i + 1] in ("JUMP", "JUMPI") or isinstance(assembly[i + 1], PUSHLABEL)): used_as_jumpdests.add(item.label) else: used_as_labels.add(item.label) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 5e23b4bec7..90cd4e2a85 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -542,7 +542,7 @@ def _generate_evm_for_instruction( ), f"invoke target must be a label (is ${type(target)} ${target})" return_label = self.mklabel("return_label") assembly.extend( - [PUSHLABEL(return_label), PUSHLABEL(_as_asm_symbol(target)), "JUMP", return_label] + [PUSHLABEL(return_label), PUSHLABEL(_as_asm_symbol(target)), "JUMP", JUMPDEST(return_label)] ) elif opcode == "ret": assembly.append("JUMP") From 9ebf29bafb1501afa5ba9203269e653273a47934 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 26 Jun 2025 12:10:53 +0300 Subject: [PATCH 112/172] PUSHLABELJUMPDEST assembly instruction for pushing jump targets --- vyper/evm/assembler/core.py | 28 ++++++++++++++++++++++++---- vyper/evm/assembler/optimizer.py | 14 +++++--------- vyper/venom/venom_to_assembly.py | 15 ++++++++------- 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 442fc05109..77a6dc6c64 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -59,6 +59,26 @@ def __eq__(self, other): def __hash__(self): return hash(self.label) +class PUSHLABELJUMPDEST: + """ + This is a special case of PUSHLABEL that is used to push a label + that is used in a jump or return address. This is used to allow + the optimizer to remove jumpdests that are not used. + """ + def __init__(self, label: Label): + assert isinstance(label, Label), label + self.label = label + + def __repr__(self): + return f"PUSHLABELJUMPDEST {self.label.label}" + + def __eq__(self, other): + if not isinstance(other, PUSHLABELJUMPDEST): + return False + return self.label == other.label + + def __hash__(self): + return hash(self.label) # push the result of an addition (which might be resolvable at compile-time) class PUSH_OFST: @@ -125,11 +145,11 @@ def PUSH_N(x, n): def JUMP(label: Label): - return [PUSHLABEL(label), "JUMP"] + return [PUSHLABELJUMPDEST(label), "JUMP"] def JUMPI(label: Label): - return [PUSHLABEL(label), "JUMPI"] + return [PUSHLABELJUMPDEST(label), "JUMPI"] def mkdebug(pc_debugger, ast_source): @@ -241,7 +261,7 @@ def resolve_symbols( elif isinstance(item, Label): _add_to_symbol_map(symbol_map, item, pc) - elif isinstance(item, PUSHLABEL): + elif isinstance(item, (PUSHLABEL, PUSHLABELJUMPDEST)): pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits elif isinstance(item, PUSH_OFST): @@ -404,7 +424,7 @@ def _assembly_to_evm( elif isinstance(item, Label): continue # Label does not show up in bytecode - elif isinstance(item, PUSHLABEL): + elif isinstance(item, (PUSHLABEL, PUSHLABELJUMPDEST)): # push a symbol to stack label = item.label bytecode = _compile_push_instruction(PUSH_N(symbol_map[label], n=SYMBOL_SIZE)) diff --git a/vyper/evm/assembler/optimizer.py b/vyper/evm/assembler/optimizer.py index 17056458ed..29609dcf25 100644 --- a/vyper/evm/assembler/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -1,4 +1,4 @@ -from vyper.evm.assembler.core import DATA_ITEM, JUMPDEST, PUSHLABEL, Label, is_symbol +from vyper.evm.assembler.core import DATA_ITEM, JUMPDEST, PUSHLABEL, PUSHLABELJUMPDEST, Label, is_symbol from vyper.evm.constants import COMMUTATIVE_OPS from vyper.exceptions import CompilerPanic @@ -167,15 +167,11 @@ def _prune_unused_jumpdests(assembly): used_as_labels: set[Label] = set() # find all used jumpdests - for i, item in enumerate(assembly): + for item in assembly: if isinstance(item, PUSHLABEL): - # only add if the next item is a jump instruction, or a pushlabel - # which happens when pushing return labels for inline calls. - # TODO: this is a hack and we should have a better way to handle this. - if i + 1 < len(assembly) and (assembly[i + 1] in ("JUMP", "JUMPI") or isinstance(assembly[i + 1], PUSHLABEL)): - used_as_jumpdests.add(item.label) - else: - used_as_labels.add(item.label) + used_as_labels.add(item.label) + elif isinstance(item, PUSHLABELJUMPDEST): + used_as_jumpdests.add(item.label) if isinstance(item, DATA_ITEM) and isinstance(item.data, Label): # add symbols used in data sections as they are likely diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 90cd4e2a85..09cd68a085 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -8,6 +8,7 @@ PUSH, PUSH_OFST, PUSHLABEL, + PUSHLABELJUMPDEST, AssemblyInstruction, Label, TaggedInstruction, @@ -248,7 +249,7 @@ def _emit_input_operands( # invoke emits the actual instruction itself so we don't need # to emit it here but we need to add it to the stack map if inst.opcode != "invoke": - assembly.append(PUSHLABEL(_as_asm_symbol(op))) + assembly.append(PUSHLABELJUMPDEST(_as_asm_symbol(op))) stack.push(op) continue @@ -516,19 +517,19 @@ def _generate_evm_for_instruction( elif opcode == "jnz": # jump if not zero if_nonzero_label, if_zero_label = inst.get_label_operands() - assembly.append(PUSHLABEL(_as_asm_symbol(if_nonzero_label))) + assembly.append(PUSHLABELJUMPDEST(_as_asm_symbol(if_nonzero_label))) assembly.append("JUMPI") # make sure the if_zero_label will be optimized out # assert if_zero_label == next(iter(inst.parent.cfg_out)).label - assembly.append(PUSHLABEL(_as_asm_symbol(if_zero_label))) + assembly.append(PUSHLABELJUMPDEST(_as_asm_symbol(if_zero_label))) assembly.append("JUMP") elif opcode == "jmp": (target,) = inst.operands assert isinstance(target, IRLabel) - assembly.append(PUSHLABEL(_as_asm_symbol(target))) + assembly.append(PUSHLABELJUMPDEST(_as_asm_symbol(target))) assembly.append("JUMP") elif opcode == "djmp": assert isinstance( @@ -542,7 +543,7 @@ def _generate_evm_for_instruction( ), f"invoke target must be a label (is ${type(target)} ${target})" return_label = self.mklabel("return_label") assembly.extend( - [PUSHLABEL(return_label), PUSHLABEL(_as_asm_symbol(target)), "JUMP", JUMPDEST(return_label)] + [PUSHLABELJUMPDEST(return_label), PUSHLABELJUMPDEST(_as_asm_symbol(target)), "JUMP", JUMPDEST(return_label)] ) elif opcode == "ret": assembly.append("JUMP") @@ -565,10 +566,10 @@ def _generate_evm_for_instruction( ] ) elif opcode == "assert": - assembly.extend(["ISZERO", PUSHLABEL(Label("revert")), "JUMPI"]) + assembly.extend(["ISZERO", PUSHLABELJUMPDEST(Label("revert")), "JUMPI"]) elif opcode == "assert_unreachable": end_symbol = self.mklabel("reachable") - assembly.extend([PUSHLABEL(end_symbol), "JUMPI", "INVALID", JUMPDEST(end_symbol)]) + assembly.extend([PUSHLABELJUMPDEST(end_symbol), "JUMPI", "INVALID", JUMPDEST(end_symbol)]) elif opcode == "iload": addr = inst.operands[0] mem_deploy_end = self.ctx.constants["mem_deploy_end"] From 7181fa9373702c8a97f54a6f36f063100c36decf Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 26 Jun 2025 12:27:39 +0300 Subject: [PATCH 113/172] more data to separate function so we can place them at the bottom --- vyper/compiler/phases.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 17c4885e27..e6d44965e4 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -293,14 +293,17 @@ def venom_deploytime(self): main_entry.append_basic_block(revert_bb) revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) - bb = IRBasicBlock(IRLabel("runtime_begin"), main_entry) + data_fn = venom_ctx.create_function("data") + data_fn.clear_basic_blocks() + + bb = IRBasicBlock(IRLabel("runtime_begin"), data_fn) bb.is_pinned = True - main_entry.append_basic_block(bb) + data_fn.append_basic_block(bb) bb.append_instruction("db", IRHexString(self.bytecode_runtime)) - bb = IRBasicBlock(IRLabel("cbor_metadata"), main_entry) + bb = IRBasicBlock(IRLabel("cbor_metadata"), data_fn) bb.is_pinned = True - main_entry.append_basic_block(bb) + data_fn.append_basic_block(bb) bb.append_instruction("db", IRHexString(self.bytecode_metadata)) convert_data_segment_to_function(venom_ctx, venom_ctx.data_segment) From fbb2b13f391e03a2cdb2c21abce660d542468bdd Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 26 Jun 2025 12:40:09 +0300 Subject: [PATCH 114/172] cleaner output --- vyper/compiler/output.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 82dc97ddf2..d36a381392 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -16,7 +16,7 @@ from vyper.typing import StorageLayout from vyper.utils import safe_relpath from vyper.warnings import ContractSizeLimit, vyper_warn - +from vyper.evm.assembler.core import Label, JUMPDEST def build_ast_dict(compiler_data: CompilerData) -> dict: ast_dict = { @@ -337,7 +337,7 @@ def _build_asm(asm_list): output_string = "__entry__:" in_push = 0 for item in asm_list: - if isinstance(item, compile_ir.Label): + if isinstance(item, (Label, JUMPDEST)): output_string += f"\n\n{item}:" continue From 84ad20e2235feb06588bd802c331c0a909b7841d Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 26 Jun 2025 12:58:37 +0300 Subject: [PATCH 115/172] asm optimizer fixes --- vyper/evm/assembler/optimizer.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/vyper/evm/assembler/optimizer.py b/vyper/evm/assembler/optimizer.py index 29609dcf25..40c3383959 100644 --- a/vyper/evm/assembler/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -30,17 +30,17 @@ def _prune_unreachable_code(assembly): def _prune_inefficient_jumps(assembly): - # prune sequences `PUSHLABEL x JUMP LABEL x` to `LABEL x` + # prune sequences `PUSHLABELJUMPDEST x JUMP LABEL x` to `LABEL x` changed = False i = 0 while i < len(assembly) - 2: if ( - isinstance(assembly[i], PUSHLABEL) + isinstance(assembly[i], PUSHLABELJUMPDEST) and assembly[i + 1] == "JUMP" - and is_symbol(assembly[i + 2]) - and assembly[i + 2] == assembly[i].label + and isinstance(assembly[i + 2], (Label, JUMPDEST)) + and assembly[i + 2].label == assembly[i].label ): - # delete PUSHLABEL x JUMP + # delete PUSHLABELJUMPDEST x JUMP changed = True del assembly[i : i + 2] else: @@ -51,15 +51,15 @@ def _prune_inefficient_jumps(assembly): def _optimize_inefficient_jumps(assembly): # optimize sequences - # `PUSHLABEL common JUMPI PUSHLABEL x JUMP LABEL common` - # to `ISZERO PUSHLABEL x JUMPI LABEL common` + # `PUSHLABELJUMPDEST common JUMPI PUSHLABELJUMPDEST x JUMP LABEL common` + # to `ISZERO PUSHLABELJUMPDEST x JUMPI LABEL common` changed = False i = 0 while i < len(assembly) - 4: if ( - isinstance(assembly[i], PUSHLABEL) + isinstance(assembly[i], PUSHLABELJUMPDEST) and assembly[i + 1] == "JUMPI" - and isinstance(assembly[i + 2], PUSHLABEL) + and isinstance(assembly[i + 2], PUSHLABELJUMPDEST) and assembly[i + 3] == "JUMP" and isinstance(assembly[i + 4], Label) and assembly[i].label == assembly[i + 4] @@ -94,17 +94,17 @@ def _merge_jumpdests(assembly): if new_symbol != current_symbol: for j in range(len(assembly)): if ( - isinstance(assembly[j], PUSHLABEL) + isinstance(assembly[j], (PUSHLABEL, PUSHLABELJUMPDEST)) and assembly[j].label == current_symbol ): assembly[j].label = new_symbol changed = True - elif isinstance(assembly[i + 1], PUSHLABEL) and assembly[i + 2] == "JUMP": - # LABEL x PUSHLABEL y JUMP - # replace all instances of PUSHLABEL x with PUSHLABEL y + elif isinstance(assembly[i + 1], PUSHLABELJUMPDEST) and assembly[i + 2] == "JUMP": + # LABEL x PUSHLABELJUMPDEST y JUMP + # replace all instances of PUSHLABELJUMPDEST x with PUSHLABELJUMPDEST y new_symbol = assembly[i + 1].label for j in range(len(assembly)): - if isinstance(assembly[j], PUSHLABEL) and assembly[j].label == current_symbol: + if isinstance(assembly[j], PUSHLABELJUMPDEST) and assembly[j].label == current_symbol: assembly[j].label = new_symbol changed = True @@ -149,7 +149,7 @@ def _merge_iszero(assembly): # but it could also just be a no-op before JUMPI. if ( assembly[i : i + 2] == ["ISZERO", "ISZERO"] - and isinstance(assembly[i + 2], PUSHLABEL) + and isinstance(assembly[i + 2], PUSHLABELJUMPDEST) and assembly[i + 3] == "JUMPI" ): changed = True From 067ce293360d401e8a2e07cc4caf134aea24c3bc Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 26 Jun 2025 13:11:46 +0300 Subject: [PATCH 116/172] replace PUSHLABELJUMPDEST with PUSHLABEL for generic operands --- vyper/venom/venom_to_assembly.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 09cd68a085..f9bc7ad598 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -249,7 +249,7 @@ def _emit_input_operands( # invoke emits the actual instruction itself so we don't need # to emit it here but we need to add it to the stack map if inst.opcode != "invoke": - assembly.append(PUSHLABELJUMPDEST(_as_asm_symbol(op))) + assembly.append(PUSHLABEL(_as_asm_symbol(op))) stack.push(op) continue From da27681af334e56213334d82104f0624a5895baf Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 26 Jun 2025 13:35:40 +0300 Subject: [PATCH 117/172] fix test --- tests/functional/venom/test_venom_label_variables.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/functional/venom/test_venom_label_variables.py b/tests/functional/venom/test_venom_label_variables.py index 0f34f073d6..98e0d1aa40 100644 --- a/tests/functional/venom/test_venom_label_variables.py +++ b/tests/functional/venom/test_venom_label_variables.py @@ -74,6 +74,8 @@ def test_labels_as_variables(): %52 = mload 64 %53 = sub %36, %52 return %52, %53 + revert: [pinned] + revert 0,0 } """ ctx = parse_venom(code) From 340f34925290dde1908f6f2d8693c5f0dad4a051 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 26 Jun 2025 13:50:59 +0300 Subject: [PATCH 118/172] refactor `get_data_segment_lengths()` to be easier to follow --- vyper/evm/assembler/core.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 77a6dc6c64..563950e94f 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -342,25 +342,33 @@ def note_breakpoint(line_number_map, pc, item): # predict what length of an assembly [data] node will be in bytecode def get_data_segment_lengths(assembly: list[AssemblyInstruction]) -> list[int]: - ret = [0] + segments = [] + current_segment_length = 0 + for item in assembly: if isinstance(item, Label): - if len(ret) > 0 and ret[-1] > 0: - ret.append(0) - continue - if len(ret) == 0: - # haven't yet seen a data header + if current_segment_length > 0: + segments.append(current_segment_length) + current_segment_length = 0 continue + if not isinstance(item, DATA_ITEM): + # Only DATA_ITEM contributes to segment length continue + + # Add to current segment length if is_symbol(item.data): - ret[-1] += SYMBOL_SIZE + current_segment_length += SYMBOL_SIZE elif isinstance(item.data, bytes): - ret[-1] += len(item.data) + current_segment_length += len(item.data) else: # pragma: nocover raise ValueError(f"invalid data {type(item)} {item}") - - return ret + + # Add the final segment if it has data + if current_segment_length > 0: + segments.append(current_segment_length) + + return segments def _compile_data_item(item: DATA_ITEM, symbol_map: dict[SymbolKey, int]) -> bytes: From c54766287e2e9e57f5cce0692ee948576c208eca Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 26 Jun 2025 14:04:42 +0300 Subject: [PATCH 119/172] fix test --- tests/unit/compiler/venom/test_venom_to_assembly.py | 6 ++---- vyper/evm/assembler/core.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/unit/compiler/venom/test_venom_to_assembly.py b/tests/unit/compiler/venom/test_venom_to_assembly.py index eabe990af3..9b092c45ba 100644 --- a/tests/unit/compiler/venom/test_venom_to_assembly.py +++ b/tests/unit/compiler/venom/test_venom_to_assembly.py @@ -1,6 +1,4 @@ -from vyper.compiler.phases import generate_bytecode -from vyper.evm.assembler.core import PUSHLABEL -from vyper.evm.assembler.symbols import Label +from vyper.evm.assembler.core import Label, PUSHLABEL from vyper.venom.parser import parse_venom from vyper.venom.venom_to_assembly import VenomCompiler @@ -52,4 +50,4 @@ def test_global_vars(): """ ctx = parse_venom(code) asm = VenomCompiler(ctx).generate_evm_assembly() - assert asm == [Label("main"), "PUSH1", 1, "PUSH1", 2, "POP", PUSHLABEL(Label("main")), "ADD", "JUMP"] \ No newline at end of file + assert asm == [Label("global_var"), "PUSH1", 1, "PUSH1", 2, "POP", PUSHLABEL(Label("global_var")), "ADD", "JUMP"] \ No newline at end of file diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 563950e94f..f84dee5647 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -45,7 +45,7 @@ def __repr__(self): class PUSHLABEL: def __init__(self, label: Label): - assert isinstance(label, Label), label + assert isinstance(label, Label), f"invalid label {type(label)} {label}" self.label = label def __repr__(self): From 330cd8ecca87d1320dac321f6ea1d1fcdca6b71f Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 26 Jun 2025 14:10:00 +0300 Subject: [PATCH 120/172] fix tests in syntax and that revert is no longer automatic --- tests/functional/venom/parser/test_parsing.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/tests/functional/venom/parser/test_parsing.py b/tests/functional/venom/parser/test_parsing.py index e304e15e31..be34b0146d 100644 --- a/tests/functional/venom/parser/test_parsing.py +++ b/tests/functional/venom/parser/test_parsing.py @@ -87,7 +87,7 @@ def test_data_section(): stop } - data readonly { + function selector_buckets { selector_buckets: db @selector_bucket_0 db @fallback @@ -104,12 +104,6 @@ def test_data_section(): expected_ctx.add_function(entry_fn := IRFunction(IRLabel("entry"))) entry_fn.get_basic_block("entry").append_instruction("stop") - expected_ctx.add_function(revert_fn := IRFunction(IRLabel("revert"))) - revert_fn.clear_basic_blocks() - revert_bb = IRBasicBlock(IRLabel("revert"), revert_fn) - revert_fn.append_basic_block(revert_bb) - revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) - expected_ctx.add_function(data_fn := IRFunction(IRLabel("selector_buckets"))) data_fn.clear_basic_blocks() data_bb = IRBasicBlock(IRLabel("selector_buckets"), data_fn) @@ -201,7 +195,7 @@ def test_multi_function_and_data(): revert 0, 0 } - data readonly { + function selector_buckets { selector_buckets: db @selector_bucket_0 db @fallback @@ -237,13 +231,6 @@ def test_multi_function_and_data(): check_fn.append_basic_block(value_bb := IRBasicBlock(IRLabel("has_value"), check_fn)) value_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) - # Revert function is automatically created with data segments - expected_ctx.add_function(revert_fn := IRFunction(IRLabel("revert"))) - revert_fn.clear_basic_blocks() - revert_bb = IRBasicBlock(IRLabel("revert"), revert_fn) - revert_fn.append_basic_block(revert_bb) - revert_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) - # Data segment is now converted to a function expected_ctx.add_function(data_fn := IRFunction(IRLabel("selector_buckets"))) data_fn.clear_basic_blocks() From 5071a7afae65b6d6a0be4d1a5ab0785cafa5a539 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 26 Jun 2025 14:18:46 +0300 Subject: [PATCH 121/172] fix test --- tests/functional/codegen/test_selector_table_stability.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/codegen/test_selector_table_stability.py b/tests/functional/codegen/test_selector_table_stability.py index a1f58d6852..1a4c9ac64e 100644 --- a/tests/functional/codegen/test_selector_table_stability.py +++ b/tests/functional/codegen/test_selector_table_stability.py @@ -14,7 +14,7 @@ def test_dense_jumptable_stability(): # test that the selector table data is stable across different runs # (xdist should provide different PYTHONHASHSEEDs). - expected_asm = """DATA BUCKET_HEADERS:\n DATABYTES 0b42\n DATALABEL bucket_0\n DATABYTES 0a\n DATABYTES 2b8d\n DATALABEL bucket_1\n DATABYTES 0c\n DATABYTES 0085\n DATALABEL bucket_2\n DATABYTES 08\n\nDATA bucket_1:\n DATABYTES d8eea1e8\n DATALABEL external 6 foo6()3639517672\n DATABYTES 05\n DATABYTES d29ee0f9\n DATALABEL external 0 foo0()3533627641\n DATABYTES 05\n DATABYTES 05f1e05f\n DATALABEL external 2 foo2()99737695\n DATABYTES 05\n DATABYTES 9109b47b\n DATALABEL external 23 foo23()2433332347\n DATABYTES 05\n DATABYTES 6e70337f\n DATALABEL external 11 foo11()1852846975\n DATABYTES 05\n DATABYTES 26f596f9\n DATALABEL external 13 foo13()653629177\n DATABYTES 05\n DATABYTES 046761eb\n DATALABEL external 14 foo14()73884139\n DATABYTES 05\n DATABYTES 8906adc6\n DATALABEL external 17 foo17()2298916294\n DATABYTES 05\n DATABYTES e425acd1\n DATALABEL external 4 foo4()3827674321\n DATABYTES 05\n DATABYTES 796a01ac\n DATALABEL external 7 foo7()2036990380\n DATABYTES 05\n DATABYTES f1e64be5\n DATALABEL external 29 foo29()4058401765\n DATABYTES 05\n DATABYTES d28958b8\n DATALABEL external 3 foo3()3532216504\n DATABYTES 05\n\nDATA bucket_2:\n DATABYTES 0670ff6a\n DATALABEL external 25 foo25()108068714\n DATABYTES 05\n DATABYTES 96349949\n DATALABEL external 24 foo24()2520029513\n DATABYTES 05\n DATABYTES 7381e7c1\n DATALABEL external 10 foo10()1937893313\n DATABYTES 05\n DATABYTES 85adc131\n DATALABEL external 28 foo28()2242756913\n DATABYTES 05\n DATABYTES fa22b1ed\n DATALABEL external 5 foo5()4196577773\n DATABYTES 05\n DATABYTES 41e75b05\n DATALABEL external 22 foo22()1105681157\n DATABYTES 05\n DATABYTES d38955e8\n DATALABEL external 1 foo1()3548993000\n DATABYTES 05\n DATABYTES 684cf8f3\n DATALABEL external 20 foo20()1749874931\n DATABYTES 05\n\nDATA bucket_0:\n DATABYTES eed91de3\n DATALABEL external 9 foo9()4007206371\n DATABYTES 05\n DATABYTES 61bc1c68\n DATALABEL external 16 foo16()1639717992\n DATABYTES 05\n DATABYTES d32aa70c\n DATALABEL external 21 foo21()3542787852\n DATABYTES 05\n DATABYTES 186947d9\n DATALABEL external 19 foo19()409552857\n DATABYTES 05\n DATABYTES 0af1f97f\n DATALABEL external 18 foo18()183630207\n DATABYTES 05\n DATABYTES 29dad760\n DATALABEL external 27 foo27()702207840\n DATABYTES 05\n DATABYTES 32f6aada\n DATALABEL external 12 foo12()855026394\n DATABYTES 05\n DATABYTES beb505f5\n DATALABEL external 15 foo15()3199534581\n DATABYTES 05\n DATABYTES fca75fe6\n DATALABEL external 8 foo8()4238827494\n DATABYTES 05\n DATABYTES 1b124338\n DATALABEL external 26 foo26()454181688\n DATABYTES 05""" # noqa: E501 + expected_asm = """LABEL BUCKET_HEADERS:\n DATABYTES 0b42\n DATALABEL bucket_0\n DATABYTES 0a\n DATABYTES 2b8d\n DATALABEL bucket_1\n DATABYTES 0c\n DATABYTES 0085\n DATALABEL bucket_2\n DATABYTES 08\n\nLABEL bucket_1:\n DATABYTES d8eea1e8\n DATALABEL external 6 foo6()3639517672\n DATABYTES 05\n DATABYTES d29ee0f9\n DATALABEL external 0 foo0()3533627641\n DATABYTES 05\n DATABYTES 05f1e05f\n DATALABEL external 2 foo2()99737695\n DATABYTES 05\n DATABYTES 9109b47b\n DATALABEL external 23 foo23()2433332347\n DATABYTES 05\n DATABYTES 6e70337f\n DATALABEL external 11 foo11()1852846975\n DATABYTES 05\n DATABYTES 26f596f9\n DATALABEL external 13 foo13()653629177\n DATABYTES 05\n DATABYTES 046761eb\n DATALABEL external 14 foo14()73884139\n DATABYTES 05\n DATABYTES 8906adc6\n DATALABEL external 17 foo17()2298916294\n DATABYTES 05\n DATABYTES e425acd1\n DATALABEL external 4 foo4()3827674321\n DATABYTES 05\n DATABYTES 796a01ac\n DATALABEL external 7 foo7()2036990380\n DATABYTES 05\n DATABYTES f1e64be5\n DATALABEL external 29 foo29()4058401765\n DATABYTES 05\n DATABYTES d28958b8\n DATALABEL external 3 foo3()3532216504\n DATABYTES 05\n\nLABEL bucket_2:\n DATABYTES 0670ff6a\n DATALABEL external 25 foo25()108068714\n DATABYTES 05\n DATABYTES 96349949\n DATALABEL external 24 foo24()2520029513\n DATABYTES 05\n DATABYTES 7381e7c1\n DATALABEL external 10 foo10()1937893313\n DATABYTES 05\n DATABYTES 85adc131\n DATALABEL external 28 foo28()2242756913\n DATABYTES 05\n DATABYTES fa22b1ed\n DATALABEL external 5 foo5()4196577773\n DATABYTES 05\n DATABYTES 41e75b05\n DATALABEL external 22 foo22()1105681157\n DATABYTES 05\n DATABYTES d38955e8\n DATALABEL external 1 foo1()3548993000\n DATABYTES 05\n DATABYTES 684cf8f3\n DATALABEL external 20 foo20()1749874931\n DATABYTES 05\n\nLABEL bucket_0:\n DATABYTES eed91de3\n DATALABEL external 9 foo9()4007206371\n DATABYTES 05\n DATABYTES 61bc1c68\n DATALABEL external 16 foo16()1639717992\n DATABYTES 05\n DATABYTES d32aa70c\n DATALABEL external 21 foo21()3542787852\n DATABYTES 05\n DATABYTES 186947d9\n DATALABEL external 19 foo19()409552857\n DATABYTES 05\n DATABYTES 0af1f97f\n DATALABEL external 18 foo18()183630207\n DATABYTES 05\n DATABYTES 29dad760\n DATALABEL external 27 foo27()702207840\n DATABYTES 05\n DATABYTES 32f6aada\n DATALABEL external 12 foo12()855026394\n DATABYTES 05\n DATABYTES beb505f5\n DATALABEL external 15 foo15()3199534581\n DATABYTES 05\n DATABYTES fca75fe6\n DATALABEL external 8 foo8()4238827494\n DATABYTES 05\n DATABYTES 1b124338\n DATALABEL external 26 foo26()454181688\n DATABYTES 05""" # noqa: E501 assert expected_asm in output["asm_runtime"] From 7e718d9b3503babb5b8bcdaf84b4f1954c08b290 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 26 Jun 2025 14:29:39 +0300 Subject: [PATCH 122/172] proper import and move file --- vyper/evm/{ => assembler}/constants.py | 0 vyper/evm/assembler/optimizer.py | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename vyper/evm/{ => assembler}/constants.py (100%) diff --git a/vyper/evm/constants.py b/vyper/evm/assembler/constants.py similarity index 100% rename from vyper/evm/constants.py rename to vyper/evm/assembler/constants.py diff --git a/vyper/evm/assembler/optimizer.py b/vyper/evm/assembler/optimizer.py index 40c3383959..4b15535486 100644 --- a/vyper/evm/assembler/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -1,5 +1,5 @@ from vyper.evm.assembler.core import DATA_ITEM, JUMPDEST, PUSHLABEL, PUSHLABELJUMPDEST, Label, is_symbol -from vyper.evm.constants import COMMUTATIVE_OPS +from vyper.evm.assembler.constants import COMMUTATIVE_OPS from vyper.exceptions import CompilerPanic _TERMINAL_OPS = ("JUMP", "RETURN", "REVERT", "STOP", "INVALID") @@ -228,7 +228,7 @@ def _stack_peephole_opts(assembly): ): changed = True del assembly[i : i + 2] - if assembly[i] == "SWAP1" and str(assembly[i + 1]).lower() in COMMUTATIVE_OPS: + if assembly[i] == "SWAP1" and str(assembly[i + 1]).upper() in COMMUTATIVE_OPS: changed = True del assembly[i] if assembly[i] == "DUP1" and assembly[i + 1] == "SWAP1": From bb616e6fc8bafd91705b199312f1361868ec2c33 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 26 Jun 2025 14:39:18 +0300 Subject: [PATCH 123/172] lint --- tests/functional/venom/parser/test_parsing.py | 40 ++++++++------ .../compiler/venom/test_venom_to_assembly.py | 15 +++++- vyper/compiler/output.py | 4 +- vyper/compiler/phases.py | 10 ++-- vyper/evm/assembler/core.py | 52 +++++++++---------- vyper/evm/assembler/optimizer.py | 14 ++++- vyper/venom/__init__.py | 4 +- vyper/venom/basicblock.py | 2 +- vyper/venom/context.py | 1 - vyper/venom/ir_node_to_venom.py | 11 ++-- vyper/venom/parser.py | 46 ++++++++++------ vyper/venom/venom_to_assembly.py | 18 +++++-- 12 files changed, 132 insertions(+), 85 deletions(-) diff --git a/tests/functional/venom/parser/test_parsing.py b/tests/functional/venom/parser/test_parsing.py index be34b0146d..39f15e99c1 100644 --- a/tests/functional/venom/parser/test_parsing.py +++ b/tests/functional/venom/parser/test_parsing.py @@ -1,6 +1,6 @@ from tests.venom_utils import assert_bb_eq, assert_ctx_eq from vyper.venom.basicblock import IRBasicBlock, IRLabel, IRLiteral, IRVariable -from vyper.venom.context import DataItem, DataSection, IRContext +from vyper.venom.context import IRContext from vyper.venom.function import IRFunction from vyper.venom.parser import parse_venom @@ -377,7 +377,16 @@ def test_global_vars(): """ ctx = parse_venom(source) - # assert_ctx_eq(ctx, expected_ctx) + print(ctx) + + expected_ctx = IRContext() + expected_ctx.add_function(main_fn := IRFunction(IRLabel("main"))) + main_bb = main_fn.get_basic_block("main") + main_bb.append_instruction("store", IRLiteral(1), ret=IRVariable("1")) + main_bb.append_instruction("store", IRLiteral(2), ret=IRVariable("2")) + main_bb.append_instruction("add", IRVariable("2"), IRVariable("1"), ret=IRVariable("3")) + + assert_ctx_eq(ctx, expected_ctx) def test_labels_with_addresses(): @@ -394,19 +403,20 @@ def test_labels_with_addresses(): } """ ctx = parse_venom(source) - + assert "my_global" in ctx.global_labels assert ctx.global_labels["my_global"] == 0x1000 - + main_fn = ctx.get_function(IRLabel("main")) assert main_fn is not None - + main_bb = main_fn.get_basic_block("main") assert main_bb.label.address == 0x20 - + other_bb = main_fn.get_basic_block("other_block") assert other_bb.label.address is None + def test_labels_with_addresses_used_in_function(): source = """ my_global: 0x1000 @@ -420,16 +430,16 @@ def test_labels_with_addresses_used_in_function(): } """ ctx = parse_venom(source) - + assert "my_global" in ctx.global_labels assert ctx.global_labels["my_global"] == 0x1000 - + main_fn = ctx.get_function(IRLabel("main")) assert main_fn is not None - + main_bb = main_fn.get_basic_block("main") assert main_bb.label.address == 0x20 - + other_bb = main_fn.get_basic_block("other_block") assert other_bb.label.address is None @@ -454,17 +464,17 @@ def test_labels_with_tags(): parsed_ctx = parse_venom(source) fn = list(parsed_ctx.functions.values())[0] - + start_bb = fn.get_basic_block("start") assert not start_bb.is_pinned, "start block should not be volatile" - + revert_bb = fn.get_basic_block("revert") assert revert_bb.is_pinned, "revert block should be volatile due to pinned tag" assert revert_bb.label.address == 0x100, "revert block should have address 0x100" - + special_bb = fn.get_basic_block("special") assert special_bb.is_pinned, "special block should be volatile due to pinned tag" - + normal_bb = fn.get_basic_block("normal") assert not normal_bb.is_pinned, "normal block should not be volatile" - assert normal_bb.label.address == 0x200, "normal block should have address 0x200" \ No newline at end of file + assert normal_bb.label.address == 0x200, "normal block should have address 0x200" diff --git a/tests/unit/compiler/venom/test_venom_to_assembly.py b/tests/unit/compiler/venom/test_venom_to_assembly.py index 9b092c45ba..a62b7e3e00 100644 --- a/tests/unit/compiler/venom/test_venom_to_assembly.py +++ b/tests/unit/compiler/venom/test_venom_to_assembly.py @@ -1,4 +1,4 @@ -from vyper.evm.assembler.core import Label, PUSHLABEL +from vyper.evm.assembler.core import PUSHLABEL, Label from vyper.venom.parser import parse_venom from vyper.venom.venom_to_assembly import VenomCompiler @@ -36,6 +36,7 @@ def test_optimistic_swap_params(): asm = VenomCompiler(ctx).generate_evm_assembly() assert asm == ["SWAP2", "PUSH1", 117, "POP", "MSTORE", "MSTORE", "JUMP"] + def test_global_vars(): code = """ global_var: 10 @@ -50,4 +51,14 @@ def test_global_vars(): """ ctx = parse_venom(code) asm = VenomCompiler(ctx).generate_evm_assembly() - assert asm == [Label("global_var"), "PUSH1", 1, "PUSH1", 2, "POP", PUSHLABEL(Label("global_var")), "ADD", "JUMP"] \ No newline at end of file + assert asm == [ + Label("global_var"), + "PUSH1", + 1, + "PUSH1", + 2, + "POP", + PUSHLABEL(Label("global_var")), + "ADD", + "JUMP", + ] diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index d36a381392..1e2fabb878 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -10,13 +10,13 @@ from vyper.compiler.phases import CompilerData from vyper.compiler.utils import build_gas_estimates from vyper.evm import opcodes +from vyper.evm.assembler.core import JUMPDEST, Label from vyper.exceptions import VyperException -from vyper.ir import compile_ir from vyper.semantics.types.function import ContractFunctionT, FunctionVisibility, StateMutability from vyper.typing import StorageLayout from vyper.utils import safe_relpath from vyper.warnings import ContractSizeLimit, vyper_warn -from vyper.evm.assembler.core import Label, JUMPDEST + def build_ast_dict(compiler_data: CompilerData) -> dict: ast_dict = { diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index e6d44965e4..d57086dfe4 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -3,8 +3,6 @@ from pathlib import Path, PurePath from typing import Any, Optional -from vyper.venom.ir_node_to_venom import convert_data_segment_to_function, generate_venom_from_ir -from vyper.venom.basicblock import IRBasicBlock, IRHexString, IRLabel, IRLiteral import vyper.codegen.core as codegen from vyper import ast as vy_ast from vyper.ast import natspec @@ -27,6 +25,8 @@ from vyper.typing import StorageLayout from vyper.utils import ERC5202_PREFIX, sha256sum from vyper.venom import generate_assembly_experimental, run_passes_on +from vyper.venom.basicblock import IRBasicBlock, IRHexString, IRLabel, IRLiteral +from vyper.venom.ir_node_to_venom import convert_data_segment_to_function, generate_venom_from_ir from vyper.warnings import VyperWarning, vyper_warn DEFAULT_CONTRACT_PATH = PurePath("VyperContract.vy") @@ -282,9 +282,7 @@ def venom_deploytime(self): "immutables_len": self.compilation_target._metadata["type"].immutable_section_bytes, } - venom_ctx = generate_venom_from_ir( - self.ir_nodes, self.settings, constants=constants - ) + venom_ctx = generate_venom_from_ir(self.ir_nodes, self.settings, constants=constants) main_entry = venom_ctx.entry_function @@ -300,7 +298,7 @@ def venom_deploytime(self): bb.is_pinned = True data_fn.append_basic_block(bb) bb.append_instruction("db", IRHexString(self.bytecode_runtime)) - + bb = IRBasicBlock(IRLabel("cbor_metadata"), data_fn) bb.is_pinned = True data_fn.append_basic_block(bb) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index f84dee5647..560a82068d 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -1,4 +1,3 @@ -from dataclasses import dataclass from typing import Any from vyper.evm.assembler.symbols import CONST, CONSTREF, BaseConstOp, Label, SymbolKey @@ -19,30 +18,15 @@ def num_to_bytearray(x): return o -class Label: - def __init__(self, label: str): - assert isinstance(label, str), f"invalid label {type(label)} {label}" - self.label = label - - def __repr__(self): - return f"LABEL {self.label}" - - def __eq__(self, other): - if not isinstance(other, Label): - return False - return self.label == other.label - - def __hash__(self): - return hash(self.label) - class JUMPDEST: def __init__(self, label: Label): assert isinstance(label, Label), label self.label = label - + def __repr__(self): return f"JUMPDEST {self.label.label}" + class PUSHLABEL: def __init__(self, label: Label): assert isinstance(label, Label), f"invalid label {type(label)} {label}" @@ -59,27 +43,30 @@ def __eq__(self, other): def __hash__(self): return hash(self.label) + class PUSHLABELJUMPDEST: """ This is a special case of PUSHLABEL that is used to push a label - that is used in a jump or return address. This is used to allow + that is used in a jump or return address. This is used to allow the optimizer to remove jumpdests that are not used. """ + def __init__(self, label: Label): assert isinstance(label, Label), label self.label = label - + def __repr__(self): return f"PUSHLABELJUMPDEST {self.label.label}" - + def __eq__(self, other): if not isinstance(other, PUSHLABELJUMPDEST): return False return self.label == other.label - + def __hash__(self): return hash(self.label) + # push the result of an addition (which might be resolvable at compile-time) class PUSH_OFST: def __init__(self, label: Label | CONSTREF, ofst: int): @@ -169,7 +156,16 @@ def is_ofst(assembly_item): AssemblyInstruction = ( - str | TaggedInstruction | int | PUSHLABEL | JUMPDEST | PUSH_OFST | DATA_ITEM | CONST + str + | TaggedInstruction + | int + | Label + | PUSHLABEL + | PUSHLABELJUMPDEST + | JUMPDEST + | PUSH_OFST + | DATA_ITEM + | CONST ) @@ -344,18 +340,18 @@ def note_breakpoint(line_number_map, pc, item): def get_data_segment_lengths(assembly: list[AssemblyInstruction]) -> list[int]: segments = [] current_segment_length = 0 - + for item in assembly: if isinstance(item, Label): if current_segment_length > 0: segments.append(current_segment_length) current_segment_length = 0 continue - + if not isinstance(item, DATA_ITEM): # Only DATA_ITEM contributes to segment length continue - + # Add to current segment length if is_symbol(item.data): current_segment_length += SYMBOL_SIZE @@ -363,11 +359,11 @@ def get_data_segment_lengths(assembly: list[AssemblyInstruction]) -> list[int]: current_segment_length += len(item.data) else: # pragma: nocover raise ValueError(f"invalid data {type(item)} {item}") - + # Add the final segment if it has data if current_segment_length > 0: segments.append(current_segment_length) - + return segments diff --git a/vyper/evm/assembler/optimizer.py b/vyper/evm/assembler/optimizer.py index 4b15535486..d8bfa2b87a 100644 --- a/vyper/evm/assembler/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -1,5 +1,12 @@ -from vyper.evm.assembler.core import DATA_ITEM, JUMPDEST, PUSHLABEL, PUSHLABELJUMPDEST, Label, is_symbol from vyper.evm.assembler.constants import COMMUTATIVE_OPS +from vyper.evm.assembler.core import ( + DATA_ITEM, + JUMPDEST, + PUSHLABEL, + PUSHLABELJUMPDEST, + Label, + is_symbol, +) from vyper.exceptions import CompilerPanic _TERMINAL_OPS = ("JUMP", "RETURN", "REVERT", "STOP", "INVALID") @@ -104,7 +111,10 @@ def _merge_jumpdests(assembly): # replace all instances of PUSHLABELJUMPDEST x with PUSHLABELJUMPDEST y new_symbol = assembly[i + 1].label for j in range(len(assembly)): - if isinstance(assembly[j], PUSHLABELJUMPDEST) and assembly[j].label == current_symbol: + if ( + isinstance(assembly[j], PUSHLABELJUMPDEST) + and assembly[j].label == current_symbol + ): assembly[j].label = new_symbol changed = True diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index b03f2d1c26..8b7fb9caf3 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -39,12 +39,14 @@ DEFAULT_OPT_LEVEL = OptimizationLevel.default() + def generate_assembly_experimental( venom_ctx: IRContext, optimize: OptimizationLevel = DEFAULT_OPT_LEVEL ) -> list[AssemblyInstruction]: compiler = VenomCompiler(venom_ctx) return compiler.generate_evm_assembly(optimize == OptimizationLevel.NONE) + def _run_passes(fn: IRFunction, optimize: OptimizationLevel, ac: IRAnalysesCache) -> None: # Run passes on Venom IR # TODO: Add support for optimization levels @@ -124,5 +126,3 @@ def run_passes_on(ctx: IRContext, optimize: OptimizationLevel) -> None: for fn in ctx.functions.values(): _run_passes(fn, optimize, ir_analyses[fn]) - - diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 3c9665cc4e..023c150927 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -195,7 +195,7 @@ def plain_name(self) -> str: class IRHexString(IROperand): """ - IRHexString represents a hex string literal in IR, + IRHexString represents a hex string literal in IR, currently only used for db instructions """ diff --git a/vyper/venom/context.py b/vyper/venom/context.py index e859663fd1..5673ab57a4 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -1,4 +1,3 @@ -import textwrap from dataclasses import dataclass, field from typing import Iterator, Optional diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 844022f1fa..b84d6c3fe9 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -12,12 +12,12 @@ from vyper.ir.compile_ir import _runtime_code_offsets from vyper.venom.basicblock import ( IRBasicBlock, + IRHexString, IRInstruction, IRLabel, IRLiteral, IROperand, IRVariable, - IRHexString, ) from vyper.venom.context import DataSection, IRContext from vyper.venom.function import IRFunction, IRParameter @@ -564,7 +564,7 @@ def _convert_ir_bb(fn, ir, symbols): return IRLabel(ir.args[0].value, True) elif ir.value == "data": label = IRLabel(ir.args[0].value, True) - + ctx.append_data_section(label) for c in ir.args[1:]: @@ -575,7 +575,7 @@ def _convert_ir_bb(fn, ir, symbols): data = _convert_ir_bb(fn, c, symbols) assert isinstance(data, IRLabel) # help mypy ctx.append_data_item(data) - + elif ir.value == "label": label = IRLabel(ir.args[0].value, True) bb = fn.get_basic_block() @@ -779,11 +779,11 @@ def _convert_ir_opcode(fn: IRFunction, ir: IRnode, symbols: SymbolTable) -> None def convert_data_segment_to_function(ctx: IRContext, data_sections: list[DataSection]) -> None: if len(data_sections) == 0: return - + first_label = data_sections[0].label fn = ctx.create_function(first_label.value) fn.clear_basic_blocks() - + for data_section in data_sections: bb = IRBasicBlock(data_section.label, fn) bb.is_pinned = True @@ -797,4 +797,3 @@ def convert_data_segment_to_function(ctx: IRContext, data_sections: list[DataSec assert isinstance(data_item.data, bytes) hex_string = IRHexString(data_item.data) bb.append_instruction("db", hex_string) - \ No newline at end of file diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index a9c7c88ba2..42d12bf475 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -5,12 +5,12 @@ from vyper.venom.basicblock import ( IRBasicBlock, + IRHexString, IRInstruction, IRLabel, IRLiteral, IROperand, IRVariable, - IRHexString, ) from vyper.venom.context import IRContext from vyper.venom.function import IRFunction @@ -115,7 +115,9 @@ class _GlobalLabel(_TypedItem): class _LabelDecl: """Represents a block declaration in the parse tree.""" - def __init__(self, label: str, address: Optional[int] = None, tags: Optional[list[str]] = None) -> None: + def __init__( + self, label: str, address: Optional[int] = None, tags: Optional[list[str]] = None + ) -> None: self.label = label self.address = address self.tags = tags or [] @@ -124,22 +126,22 @@ def __init__(self, label: str, address: Optional[int] = None, tags: Optional[lis class VenomTransformer(Transformer): def start(self, children) -> IRContext: ctx = IRContext() - + # Separate global labels and functions global_labels = [] funcs = [] - + for child in children: if isinstance(child, _GlobalLabel): global_labels.append(child) else: funcs.append(child) - + # Process global labels for global_label in global_labels: name, address = global_label.children ctx.add_global_label(name, address) - + # Process functions for fn_name, items in funcs: fn = ctx.create_function(fn_name) @@ -163,7 +165,14 @@ def start(self, children) -> IRContext: for item in items: if isinstance(item, _LabelDecl): if current_block_label is not None: - blocks.append((current_block_label, current_block_address, current_block_instructions, current_block_tags)) + blocks.append( + ( + current_block_label, + current_block_address, + current_block_instructions, + current_block_tags, + ) + ) current_block_label = item.label current_block_address = item.address current_block_tags = item.tags @@ -174,7 +183,14 @@ def start(self, children) -> IRContext: current_block_instructions.append(item) if current_block_label is not None: - blocks.append((current_block_label, current_block_address, current_block_instructions, current_block_tags)) + blocks.append( + ( + current_block_label, + current_block_address, + current_block_instructions, + current_block_tags, + ) + ) for block_data in blocks: # All blocks now have: (block_name, address, instructions, tags) @@ -183,11 +199,11 @@ def start(self, children) -> IRContext: bb = IRBasicBlock(IRLabel(block_name, True, address), fn) else: bb = IRBasicBlock(IRLabel(block_name, True), fn) - + # Set is_volatile if "pinned" tag is present if "pinned" in tags: bb.is_pinned = True - + fn.append_basic_block(bb) for instruction in instructions: @@ -217,14 +233,14 @@ def label_decl(self, children) -> _LabelDecl: label = _unescape(str(children[0])) address = None tags = [] - + # Process children after the label for child in children[1:]: if isinstance(child, IRLiteral): address = child.value elif isinstance(child, list): # tag_list returns a list tags = child - + return _LabelDecl(label, address, tags) def statement(self, children) -> IRInstruction: @@ -248,14 +264,14 @@ def instruction(self, children) -> IRInstruction: # just the opcode (IDENT) opcode = str(children[0]) # Handle Lark tokens - if hasattr(children[0], 'value'): + if hasattr(children[0], "value"): opcode = children[0].value operands = [] elif len(children) == 2: # Two cases: IDENT + operands_list OR "db" + operands_list opcode = str(children[0]) - # Handle Lark tokens - if hasattr(children[0], 'value'): + # Handle Lark tokens + if hasattr(children[0], "value"): opcode = children[0].value operands = children[1] else: diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index f9bc7ad598..c09496fd9e 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -21,12 +21,12 @@ PSEUDO_INSTRUCTION, TEST_INSTRUCTIONS, IRBasicBlock, + IRHexString, IRInstruction, IRLabel, IRLiteral, IROperand, IRVariable, - IRHexString, ) from vyper.venom.context import IRContext, IRFunction from vyper.venom.passes import NormalizationPass @@ -114,6 +114,7 @@ ] ) + def apply_line_numbers(inst: IRInstruction, asm) -> list[str]: ret = [] for op in asm: @@ -167,7 +168,7 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr asm: list[AssemblyInstruction] = [] # Add global variables to the assembly - for var_name, var_value in self.ctx.global_labels.items(): + for var_name, _var_value in self.ctx.global_labels.items(): asm.append(Label(var_name)) for fn in self.ctx.functions.values(): @@ -507,7 +508,7 @@ def _generate_evm_for_instruction( pass elif opcode == "db": # Handle inline db instruction - emit data directly to assembly - data_operand = inst.operands[0] + data_operand = inst.operands[0] if isinstance(data_operand, IRLabel): assembly.append(DATA_ITEM(_as_asm_symbol(data_operand))) elif isinstance(data_operand, IRHexString): @@ -543,7 +544,12 @@ def _generate_evm_for_instruction( ), f"invoke target must be a label (is ${type(target)} ${target})" return_label = self.mklabel("return_label") assembly.extend( - [PUSHLABELJUMPDEST(return_label), PUSHLABELJUMPDEST(_as_asm_symbol(target)), "JUMP", JUMPDEST(return_label)] + [ + PUSHLABELJUMPDEST(return_label), + PUSHLABELJUMPDEST(_as_asm_symbol(target)), + "JUMP", + JUMPDEST(return_label), + ] ) elif opcode == "ret": assembly.append("JUMP") @@ -569,7 +575,9 @@ def _generate_evm_for_instruction( assembly.extend(["ISZERO", PUSHLABELJUMPDEST(Label("revert")), "JUMPI"]) elif opcode == "assert_unreachable": end_symbol = self.mklabel("reachable") - assembly.extend([PUSHLABELJUMPDEST(end_symbol), "JUMPI", "INVALID", JUMPDEST(end_symbol)]) + assembly.extend( + [PUSHLABELJUMPDEST(end_symbol), "JUMPI", "INVALID", JUMPDEST(end_symbol)] + ) elif opcode == "iload": addr = inst.operands[0] mem_deploy_end = self.ctx.constants["mem_deploy_end"] From d346f0bd6181e0f7eecd32d028ed1bef4550ba75 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 2 Jul 2025 18:05:39 +0300 Subject: [PATCH 124/172] update old pipeline --- vyper/codegen/self_call.py | 6 +++++- vyper/ir/compile_ir.py | 12 ++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/vyper/codegen/self_call.py b/vyper/codegen/self_call.py index ed56109fd4..822f4a9b0c 100644 --- a/vyper/codegen/self_call.py +++ b/vyper/codegen/self_call.py @@ -115,7 +115,11 @@ def ir_for_self_call(stmt_expr, context): if return_buffer is not None: goto_op += [return_buffer] # pass return label to subroutine - goto_op.append(["symbol", return_label]) + return_label_node = IRnode.from_list( + ["symbol", return_label], + passthrough_metadata={"is_jump_dest": True} + ) + goto_op.append(return_label_node) call_sequence = ["seq"] call_sequence.append(eval_once_check(_freshname(stmt_expr.node_source_code))) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 92aa56e1d2..6e9128129b 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -13,10 +13,12 @@ CONSTREF, DATA_ITEM, JUMP, + JUMPDEST, JUMPI, PUSH, PUSH_OFST, PUSHLABEL, + PUSHLABELJUMPDEST, AssemblyInstruction, Label, TaggedInstruction, @@ -686,7 +688,13 @@ def _height_of(varname): if code.value == "symbol": label = code.args[0].value assert isinstance(label, str) - return [PUSHLABEL(Label(label))] + # Check if this symbol is meant to be used as a jump destination + # This includes return addresses for internal function calls + is_jump_dest = code.passthrough_metadata.get("is_jump_dest", False) + if is_jump_dest: + return [PUSHLABELJUMPDEST(Label(label))] + else: + return [PUSHLABEL(Label(label))] # set a symbol as a location. if code.value == "label": @@ -723,7 +731,7 @@ def _height_of(varname): self.withargs = old_withargs - return [Label(label_name)] + body_asm + pop_scoped_vars + return [JUMPDEST(Label(label_name))] + body_asm + pop_scoped_vars if code.value == "unique_symbol": symbol = code.args[0].value From a9efe4db4599a3bc4ceec106b481d06d2b79c079 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 2 Jul 2025 18:35:03 +0300 Subject: [PATCH 125/172] more fixes --- vyper/ir/compile_ir.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 6e9128129b..32f8176fd7 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -345,7 +345,7 @@ def _height_of(varname): end_symbol = self.mksymbol("join") o.extend(["ISZERO", *JUMPI(end_symbol)]) o.extend(self._compile_r(code.args[1], height)) - o.extend([end_symbol]) + o.extend([JUMPDEST(end_symbol)]) return o # If statements (3 arguments, ie. if x: y, else: z) @@ -356,9 +356,9 @@ def _height_of(varname): end_symbol = self.mksymbol("join") o.extend(["ISZERO", *JUMPI(mid_symbol)]) o.extend(self._compile_r(code.args[1], height)) - o.extend([*JUMP(end_symbol), mid_symbol]) + o.extend([*JUMP(end_symbol), JUMPDEST(mid_symbol)]) o.extend(self._compile_r(code.args[2], height)) - o.extend([end_symbol]) + o.extend([JUMPDEST(end_symbol)]) return o # repeat(counter_location, start, rounds, rounds_bound, body) @@ -420,7 +420,7 @@ def _height_of(varname): self.withargs[i_name.value] = height + 1 # stack: exit_i, i - o.extend([entry_dest]) + o.extend([JUMPDEST(entry_dest)]) with self.modify_breakdest(exit_dest, continue_dest, height + 2): o.extend(self._compile_r(body, height + 2)) @@ -432,12 +432,12 @@ def _height_of(varname): # stack: exit_i, i # increment i: - o.extend([continue_dest, "PUSH1", 1, "ADD"]) + o.extend([JUMPDEST(continue_dest), "PUSH1", 1, "ADD"]) # stack: exit_i, i+1 (new_i) # if (exit_i != new_i) { goto entry_dest } o.extend(["DUP2", "DUP2", "XOR", *JUMPI(entry_dest)]) - o.extend([exit_dest, "POP", "POP"]) + o.extend([JUMPDEST(exit_dest), "POP", "POP"]) return o @@ -569,7 +569,7 @@ def _height_of(varname): if code.value == "assert_unreachable": o = self._compile_r(code.args[0], height) end_symbol = self.mksymbol("reachable") - o.extend([*JUMPI(end_symbol), "INVALID", end_symbol]) + o.extend([*JUMPI(end_symbol), "INVALID", JUMPDEST(end_symbol)]) return o # Assert (if false, exit) @@ -766,7 +766,7 @@ def _create_postambles(self): # common revert block if self.global_revert_label is not None: - ret.extend([self.global_revert_label, *PUSH(0), "DUP1", "REVERT"]) + ret.extend([JUMPDEST(self.global_revert_label), *PUSH(0), "DUP1", "REVERT"]) return ret From f3d2580612d14684a7f1977bb95a255cfb5c0249 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 2 Jul 2025 18:46:46 +0300 Subject: [PATCH 126/172] dead code tests fix --- tests/unit/compiler/asm/test_asm_optimizer.py | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/tests/unit/compiler/asm/test_asm_optimizer.py b/tests/unit/compiler/asm/test_asm_optimizer.py index 230d1b30c4..4f6867b181 100644 --- a/tests/unit/compiler/asm/test_asm_optimizer.py +++ b/tests/unit/compiler/asm/test_asm_optimizer.py @@ -3,7 +3,7 @@ from vyper.compiler import compile_code from vyper.compiler.phases import CompilerData from vyper.compiler.settings import OptimizationLevel, Settings -from vyper.evm.assembler.core import PUSHLABEL, Label +from vyper.evm.assembler.core import PUSHLABEL, Label, JUMPDEST from vyper.evm.assembler.optimizer import _merge_jumpdests codes = [ @@ -82,19 +82,30 @@ def __init__(): def test_dead_code_eliminator(code): c = CompilerData(code, settings=Settings(optimize=OptimizationLevel.NONE)) - # get the labels - initcode_labels = [i for i in c.assembly if isinstance(i, Label)] - runtime_labels = [i for i in c.assembly_runtime if isinstance(i, Label)] + # get the labels - including both Label and JUMPDEST objects + initcode_labels = [] + for i in c.assembly: + if isinstance(i, Label): + initcode_labels.append(i.label) + elif isinstance(i, JUMPDEST): + initcode_labels.append(i.label.label) + + runtime_labels = [] + for i in c.assembly_runtime: + if isinstance(i, Label): + runtime_labels.append(i.label) + elif isinstance(i, JUMPDEST): + runtime_labels.append(i.label.label) ctor_only = "ctor_only()" runtime_only = "runtime_only()" # qux reachable from unoptimized initcode, foo not reachable. - assert any(ctor_only in label.label for label in initcode_labels) - assert all(runtime_only not in label.label for label in initcode_labels) + assert any(ctor_only in label for label in initcode_labels) + assert all(runtime_only not in label for label in initcode_labels) - assert any(runtime_only in label.label for label in runtime_labels) - assert all(ctor_only not in label.label for label in runtime_labels) + assert any(runtime_only in label for label in runtime_labels) + assert all(ctor_only not in label for label in runtime_labels) def test_library_code_eliminator(make_input_bundle, experimental_codegen): From 25645f8673ad3de5147365b735c9d9915e409e33 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 2 Jul 2025 18:51:27 +0300 Subject: [PATCH 127/172] jump map fix --- vyper/evm/assembler/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 560a82068d..ccaa529153 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -230,7 +230,7 @@ def resolve_symbols( # update pc_jump_map if item == "JUMP": last = assembly[i - 1] - if isinstance(last, PUSHLABEL) and last.label.label.startswith("internal"): + if (isinstance(last, PUSHLABEL) or isinstance(last, PUSHLABELJUMPDEST)) and last.label.label.startswith("internal"): if last.label.label.endswith("cleanup"): # exit an internal function source_map["pc_jump_map"][pc] = "o" From 2111c273e8dd03436cc1d52297fe51dbd14f6a15 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 2 Jul 2025 23:00:53 +0300 Subject: [PATCH 128/172] handle circular deps --- vyper/evm/assembler/core.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index ccaa529153..b37e09695f 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -180,7 +180,10 @@ def _resolve_constants(assembly: list[AssemblyInstruction], symbol_map: dict[Sym if isinstance(item, CONST): _add_to_symbol_map(symbol_map, CONSTREF(item.name), item.value) - while True: + max_iterations = 100 # Prevent infinite loops from circular dependencies + iterations = 0 + + while iterations < max_iterations: changed = False for item in assembly: if isinstance(item, BaseConstOp): @@ -195,6 +198,17 @@ def _resolve_constants(assembly: list[AssemblyInstruction], symbol_map: dict[Sym if not changed: break + + iterations += 1 + + # Check if we hit the iteration limit (circular dependency) + if iterations >= max_iterations: + unresolved = [] + for item in assembly: + if isinstance(item, BaseConstOp) and CONSTREF(item.name) not in symbol_map: + unresolved.append(item.name) + if unresolved: + raise CompilerPanic(f"Circular dependency detected in constants: {unresolved}") def resolve_symbols( From 9011f7e35d20ab931699ead66d5747cdc63a6ce5 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 2 Jul 2025 23:01:38 +0300 Subject: [PATCH 129/172] validate jumps --- vyper/evm/assembler/core.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index b37e09695f..b08728503e 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -404,6 +404,28 @@ def _compile_push_instruction(assembly: list[AssemblyInstruction]) -> bytes: return bytes(ret) +def _validate_assembly_jumps(assembly: list[AssemblyInstruction], symbol_map: dict[SymbolKey, int]): + """ + Validate assembly jumpdest and jump references for correctness before generating bytecode + """ + # Track all jump destinations and references + jump_dests = set() + jump_refs = set() + + for item in assembly: + if isinstance(item, JUMPDEST): + jump_dests.add(item.label) + elif isinstance(item, PUSHLABELJUMPDEST): + jump_refs.add(item.label) + + # Check all jump references have destinations + missing_dests = jump_refs - jump_dests + if missing_dests: + missing_labels = [label.label if hasattr(label, 'label') else str(label) + for label in missing_dests] + raise CompilerPanic(f"Jump references without destinations: {missing_labels}") + + def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[str, Any]]: """ Generate bytecode and source map from assembly @@ -414,6 +436,7 @@ def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[st """ # This API might seem a bit strange, but it's backwards compatible symbol_map, source_map = resolve_symbols(assembly) + _validate_assembly_jumps(assembly, symbol_map) bytecode = _assembly_to_evm(assembly, symbol_map) return bytecode, source_map From 28a38fa8f311d8b5af0b3086d0e278e12f7f9ad8 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 2 Jul 2025 23:04:33 +0300 Subject: [PATCH 130/172] safety --- vyper/evm/assembler/core.py | 2 ++ vyper/evm/assembler/optimizer.py | 21 +++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index b08728503e..b6aa94ccfd 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -385,6 +385,8 @@ def _compile_data_item(item: DATA_ITEM, symbol_map: dict[SymbolKey, int]) -> byt if isinstance(item.data, bytes): return item.data if isinstance(item.data, Label): + if item.data not in symbol_map: + raise CompilerPanic(f"Unresolved label in data section: {item.data}") symbolbytes = symbol_map[item.data].to_bytes(SYMBOL_SIZE, "big") return symbolbytes diff --git a/vyper/evm/assembler/optimizer.py b/vyper/evm/assembler/optimizer.py index d8bfa2b87a..8532d214a9 100644 --- a/vyper/evm/assembler/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -88,20 +88,37 @@ def _merge_jumpdests(assembly): # intermediate jumps. # (Usually a chain of JUMPs is created by a nested block, # or some nested if statements.) + + # First, identify labels that are used as data references + data_labels = set() + for item in assembly: + if isinstance(item, DATA_ITEM) and isinstance(item.data, Label): + data_labels.add(item.data) + elif isinstance(item, PUSHLABEL): + # PUSHLABEL is used for data references + data_labels.add(item.label) + changed = False i = 0 while i < len(assembly) - 2: # if is_symbol(assembly[i]) and assembly[i + 1] == "JUMPDEST": if is_symbol(assembly[i]): current_symbol = assembly[i] + + # Skip merging if current symbol is used as data + if current_symbol in data_labels: + i += 1 + continue + if is_symbol(assembly[i + 1]): # LABEL x LABEL y - # replace all instances of PUSHLABEL x with PUSHLABEL y + # Only merge jump destinations, not data references new_symbol = assembly[i + 1] if new_symbol != current_symbol: for j in range(len(assembly)): + # Only update PUSHLABELJUMPDEST references if ( - isinstance(assembly[j], (PUSHLABEL, PUSHLABELJUMPDEST)) + isinstance(assembly[j], PUSHLABELJUMPDEST) and assembly[j].label == current_symbol ): assembly[j].label = new_symbol From a3ac4125ffca8e279524f816c37366ef0d9ebc4b Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 2 Jul 2025 23:10:20 +0300 Subject: [PATCH 131/172] lint --- tests/unit/compiler/asm/test_asm_optimizer.py | 4 ++-- vyper/codegen/self_call.py | 3 +-- vyper/evm/assembler/core.py | 19 +++++++++++-------- vyper/evm/assembler/optimizer.py | 8 ++++---- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/tests/unit/compiler/asm/test_asm_optimizer.py b/tests/unit/compiler/asm/test_asm_optimizer.py index 4f6867b181..69fd13d846 100644 --- a/tests/unit/compiler/asm/test_asm_optimizer.py +++ b/tests/unit/compiler/asm/test_asm_optimizer.py @@ -3,7 +3,7 @@ from vyper.compiler import compile_code from vyper.compiler.phases import CompilerData from vyper.compiler.settings import OptimizationLevel, Settings -from vyper.evm.assembler.core import PUSHLABEL, Label, JUMPDEST +from vyper.evm.assembler.core import JUMPDEST, PUSHLABEL, Label from vyper.evm.assembler.optimizer import _merge_jumpdests codes = [ @@ -89,7 +89,7 @@ def test_dead_code_eliminator(code): initcode_labels.append(i.label) elif isinstance(i, JUMPDEST): initcode_labels.append(i.label.label) - + runtime_labels = [] for i in c.assembly_runtime: if isinstance(i, Label): diff --git a/vyper/codegen/self_call.py b/vyper/codegen/self_call.py index 822f4a9b0c..e5064c5e9c 100644 --- a/vyper/codegen/self_call.py +++ b/vyper/codegen/self_call.py @@ -116,8 +116,7 @@ def ir_for_self_call(stmt_expr, context): goto_op += [return_buffer] # pass return label to subroutine return_label_node = IRnode.from_list( - ["symbol", return_label], - passthrough_metadata={"is_jump_dest": True} + ["symbol", return_label], passthrough_metadata={"is_jump_dest": True} ) goto_op.append(return_label_node) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index b6aa94ccfd..3f283066f6 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -182,7 +182,7 @@ def _resolve_constants(assembly: list[AssemblyInstruction], symbol_map: dict[Sym max_iterations = 100 # Prevent infinite loops from circular dependencies iterations = 0 - + while iterations < max_iterations: changed = False for item in assembly: @@ -198,9 +198,9 @@ def _resolve_constants(assembly: list[AssemblyInstruction], symbol_map: dict[Sym if not changed: break - + iterations += 1 - + # Check if we hit the iteration limit (circular dependency) if iterations >= max_iterations: unresolved = [] @@ -244,7 +244,9 @@ def resolve_symbols( # update pc_jump_map if item == "JUMP": last = assembly[i - 1] - if (isinstance(last, PUSHLABEL) or isinstance(last, PUSHLABELJUMPDEST)) and last.label.label.startswith("internal"): + if ( + isinstance(last, PUSHLABEL) or isinstance(last, PUSHLABELJUMPDEST) + ) and last.label.label.startswith("internal"): if last.label.label.endswith("cleanup"): # exit an internal function source_map["pc_jump_map"][pc] = "o" @@ -413,18 +415,19 @@ def _validate_assembly_jumps(assembly: list[AssemblyInstruction], symbol_map: di # Track all jump destinations and references jump_dests = set() jump_refs = set() - + for item in assembly: if isinstance(item, JUMPDEST): jump_dests.add(item.label) elif isinstance(item, PUSHLABELJUMPDEST): jump_refs.add(item.label) - + # Check all jump references have destinations missing_dests = jump_refs - jump_dests if missing_dests: - missing_labels = [label.label if hasattr(label, 'label') else str(label) - for label in missing_dests] + missing_labels = [ + label.label if hasattr(label, "label") else str(label) for label in missing_dests + ] raise CompilerPanic(f"Jump references without destinations: {missing_labels}") diff --git a/vyper/evm/assembler/optimizer.py b/vyper/evm/assembler/optimizer.py index 8532d214a9..4752552cea 100644 --- a/vyper/evm/assembler/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -88,7 +88,7 @@ def _merge_jumpdests(assembly): # intermediate jumps. # (Usually a chain of JUMPs is created by a nested block, # or some nested if statements.) - + # First, identify labels that are used as data references data_labels = set() for item in assembly: @@ -97,19 +97,19 @@ def _merge_jumpdests(assembly): elif isinstance(item, PUSHLABEL): # PUSHLABEL is used for data references data_labels.add(item.label) - + changed = False i = 0 while i < len(assembly) - 2: # if is_symbol(assembly[i]) and assembly[i + 1] == "JUMPDEST": if is_symbol(assembly[i]): current_symbol = assembly[i] - + # Skip merging if current symbol is used as data if current_symbol in data_labels: i += 1 continue - + if is_symbol(assembly[i + 1]): # LABEL x LABEL y # Only merge jump destinations, not data references From 36d7f07e9dcdc01460edf676dfea265a31048a41 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 3 Jul 2025 16:57:36 +0300 Subject: [PATCH 132/172] [squash] const expr wip fix Add --asm option to output assembly code without bytecode compilation use original names when posible lint Refactor label handling in tests and const evaluation; remove address assertions for labels in functions and update const evaluation to handle IRLiteral directly. fix --- tests/functional/venom/parser/test_parsing.py | 16 +- tests/unit/venom/test_const_expressions.py | 475 ++++++++++++++++++ vyper/cli/venom_main.py | 12 +- vyper/venom/const_eval.py | 170 +++++++ vyper/venom/context.py | 6 +- vyper/venom/parser.py | 164 +++++- vyper/venom/venom_to_assembly.py | 26 +- 7 files changed, 831 insertions(+), 38 deletions(-) create mode 100644 tests/unit/venom/test_const_expressions.py create mode 100644 vyper/venom/const_eval.py diff --git a/tests/functional/venom/parser/test_parsing.py b/tests/functional/venom/parser/test_parsing.py index 39f15e99c1..22bddabe55 100644 --- a/tests/functional/venom/parser/test_parsing.py +++ b/tests/functional/venom/parser/test_parsing.py @@ -394,7 +394,7 @@ def test_labels_with_addresses(): my_global: 0x1000 function main { - main: @0x20 + main: %1 = 1 jmp @other_block other_block: @@ -410,8 +410,9 @@ def test_labels_with_addresses(): main_fn = ctx.get_function(IRLabel("main")) assert main_fn is not None + # Labels inside functions don't have addresses main_bb = main_fn.get_basic_block("main") - assert main_bb.label.address == 0x20 + assert main_bb.label.address is None other_bb = main_fn.get_basic_block("other_block") assert other_bb.label.address is None @@ -422,7 +423,7 @@ def test_labels_with_addresses_used_in_function(): my_global: 0x1000 function main { - main: @0x20 + main: %1 = 1 jmp @other_block other_block: @@ -437,9 +438,6 @@ def test_labels_with_addresses_used_in_function(): main_fn = ctx.get_function(IRLabel("main")) assert main_fn is not None - main_bb = main_fn.get_basic_block("main") - assert main_bb.label.address == 0x20 - other_bb = main_fn.get_basic_block("other_block") assert other_bb.label.address is None @@ -453,11 +451,11 @@ def test_labels_with_tags(): function main { start: nop - revert: @0x100 [pinned] + revert: [pinned] revert 0, 0 special: [tag1, pinned, tag2] nop - normal: @0x200 + normal: stop } """ @@ -470,11 +468,9 @@ def test_labels_with_tags(): revert_bb = fn.get_basic_block("revert") assert revert_bb.is_pinned, "revert block should be volatile due to pinned tag" - assert revert_bb.label.address == 0x100, "revert block should have address 0x100" special_bb = fn.get_basic_block("special") assert special_bb.is_pinned, "special block should be volatile due to pinned tag" normal_bb = fn.get_basic_block("normal") assert not normal_bb.is_pinned, "normal block should not be volatile" - assert normal_bb.label.address == 0x200, "normal block should have address 0x200" diff --git a/tests/unit/venom/test_const_expressions.py b/tests/unit/venom/test_const_expressions.py new file mode 100644 index 0000000000..c016eaa104 --- /dev/null +++ b/tests/unit/venom/test_const_expressions.py @@ -0,0 +1,475 @@ +import pytest + +from vyper.evm.assembler.core import assembly_to_evm +from vyper.evm.assembler.symbols import CONST, Label +from vyper.venom.basicblock import IRLabel, IRLiteral +from vyper.venom.const_eval import ConstEvalException, evaluate_const_expr, try_evaluate_const_expr +from vyper.venom.parser import parse_venom +from vyper.venom.venom_to_assembly import VenomCompiler + + +def test_basic_const_eval(): + constants = {"A": 10, "B": 20} + global_labels = {"label1": 0x100, "label2": 0x200} + + # Test literals + assert evaluate_const_expr(42, constants, global_labels) == 42 + + # Test constant references + assert evaluate_const_expr("$A", constants, global_labels) == 10 + assert evaluate_const_expr("$B", constants, global_labels) == 20 + + # Test label references + assert evaluate_const_expr("@label1", constants, global_labels) == 0x100 + assert evaluate_const_expr("@label2", constants, global_labels) == 0x200 + + # Test operations + assert evaluate_const_expr(("add", 10, 20), constants, global_labels) == 30 + assert evaluate_const_expr(("sub", 50, 20), constants, global_labels) == 30 + assert evaluate_const_expr(("mul", 5, 6), constants, global_labels) == 30 + assert evaluate_const_expr(("div", 60, 2), constants, global_labels) == 30 + assert evaluate_const_expr(("mod", 32, 5), constants, global_labels) == 2 + assert evaluate_const_expr(("max", 10, 20), constants, global_labels) == 20 + assert evaluate_const_expr(("min", 10, 20), constants, global_labels) == 10 + + # Test operations with references + assert evaluate_const_expr(("add", "$A", "$B"), constants, global_labels) == 30 + assert evaluate_const_expr(("add", "@label1", 0x100), constants, global_labels) == 0x200 + + # Test nested operations + assert evaluate_const_expr(("add", ("mul", 2, 3), 4), constants, global_labels) == 10 + assert evaluate_const_expr(("mul", ("add", "$A", 5), 2), constants, global_labels) == 30 + + +def test_const_eval_errors(): + constants = {"A": 10} + global_labels = {"label1": 0x100} + + # Test undefined constant + with pytest.raises(ConstEvalException, match="Undefined constant: B"): + evaluate_const_expr("$B", constants, global_labels) + + # Test undefined label + with pytest.raises(ConstEvalException, match="Undefined global label: label2"): + evaluate_const_expr("@label2", constants, global_labels) + + # Test division by zero + with pytest.raises(ConstEvalException, match="Division by zero"): + evaluate_const_expr(("div", 10, 0), constants, global_labels) + + # Test modulo by zero + with pytest.raises(ConstEvalException, match="Modulo by zero"): + evaluate_const_expr(("mod", 10, 0), constants, global_labels) + + # Test unknown operation + with pytest.raises(ConstEvalException, match="Unknown operation: unknown_op"): + evaluate_const_expr(("unknown_op", 10, 20), constants, global_labels) + + +def test_venom_const_definitions(): + code = """ + const SLOT_SIZE = 32 + const BASE_ADDR = 0x1000 + const SLOT_2 = mul($SLOT_SIZE, 2) + const DATA_START = add($BASE_ADDR, $SLOT_2) + + data_label: $DATA_START + + function main { + entry: + ret + } + """ + + ctx = parse_venom(code) + + # Check constants + assert ctx.constants["SLOT_SIZE"] == 32 + assert ctx.constants["BASE_ADDR"] == 0x1000 + assert ctx.constants["SLOT_2"] == 64 + assert ctx.constants["DATA_START"] == 0x1040 + + # Check global label + assert ctx.global_labels["data_label"] == 0x1040 + + +def test_venom_label_addresses(): + """Test that label addresses can be used in const expressions.""" + code = """ + const BASE = 0x100 + const OFFSET = 0x20 + + data_label: 0x1000 + computed_label: add(@data_label, $OFFSET) + + function main { + entry: + %1 = @data_label + %2 = @computed_label + return %1, %2 + } + """ + + ctx = parse_venom(code) + + # Check global labels + assert ctx.global_labels["data_label"] == 0x1000 + assert ctx.global_labels["computed_label"] == 0x1020 + + # Check that labels are used in instructions + fn = ctx.entry_function + bb = fn.get_basic_block("entry") + instructions = bb.instructions + + # Labels in instructions should be IRLabel objects + assert instructions[0].opcode == "store" + assert isinstance(instructions[0].operands[0], IRLabel) + assert instructions[0].operands[0].value == "data_label" + + assert instructions[1].opcode == "store" + assert isinstance(instructions[1].operands[0], IRLabel) + assert instructions[1].operands[0].value == "computed_label" + + +def test_venom_instruction_operands(): + code = """ + const SLOT_SIZE = 32 + const NUM_SLOTS = 4 + + data_label: 0x2000 + + function main { + entry: + %1 = $SLOT_SIZE + %2 = mul($SLOT_SIZE, $NUM_SLOTS) + %3 = add(@data_label, 16) + %4 = max($SLOT_SIZE, 64) + return %3, %4 + } + """ + + ctx = parse_venom(code) + fn = ctx.entry_function + bb = fn.get_basic_block("entry") + + instructions = bb.instructions + + # Check store instructions have evaluated operands + assert instructions[0].opcode == "store" + assert instructions[0].operands[0].value == 32 + + assert instructions[1].opcode == "store" + assert instructions[1].operands[0].value == 128 + + assert instructions[2].opcode == "store" + assert instructions[2].operands[0].value == 0x2010 + + assert instructions[3].opcode == "store" + assert instructions[3].operands[0].value == 64 + + +def test_venom_complex_example(): + code = """ + const WORD_SIZE = 32 + const HEADER_SIZE = 64 + const ARRAY_SLOT = 5 + const ARRAY_OFFSET = mul($ARRAY_SLOT, $WORD_SIZE) + const DATA_START = add($HEADER_SIZE, $ARRAY_OFFSET) + + array_data: $DATA_START + array_end: add(@array_data, mul($WORD_SIZE, 10)) + + function process_array { + loop_start: + %ptr = mload 0 + %val = mload %ptr + %next_ptr = add %ptr, $WORD_SIZE + mstore 0, %next_ptr + %done = ge %next_ptr, @array_end + %should_continue = iszero %done + jnz %should_continue, @loop_start, @finish + + finish: + return %ptr, %val + } + """ + + ctx = parse_venom(code) + + # Check computed constants + assert ctx.constants["ARRAY_OFFSET"] == 160 + assert ctx.constants["DATA_START"] == 224 + + # Check global labels + assert ctx.global_labels["array_data"] == 224 + assert ctx.global_labels["array_end"] == 224 + 320 # 544 + + # Check instruction operands + fn = ctx.get_function(ctx.functions[list(ctx.functions.keys())[0]].name) + bb = fn.get_basic_block("loop_start") + + # Find the add instruction + add_inst = None + for inst in bb.instructions: + if inst.opcode == "add": + add_inst = inst + break + + assert add_inst is not None + assert add_inst.operands[0].value == 32 + + +def test_try_evaluate_undefined_const(): + """Test that try_evaluate returns labels for undefined constants.""" + constants = {"A": 10} + global_labels = {"label1": 0x100} + unresolved_consts = {} + const_refs = set() + + # Test defined constant - returns value + result = try_evaluate_const_expr("$A", constants, global_labels, unresolved_consts, const_refs) + assert result == 10 + assert len(unresolved_consts) == 0 + assert len(const_refs) == 0 + + # Test undefined constant - returns label + result = try_evaluate_const_expr("$B", constants, global_labels, unresolved_consts, const_refs) + assert isinstance(result, str) + assert result == "B" # Now uses the constant name directly + assert "B" in const_refs + assert result in unresolved_consts + assert unresolved_consts[result] == ("ref", "B") + + +def test_try_evaluate_undefined_in_operation(): + """Test operations with undefined constants.""" + constants = {"A": 10} + global_labels = {} + unresolved_consts = {} + const_refs = set() + + # Operation with one undefined constant + result = try_evaluate_const_expr( + ("add", "$A", "$B"), constants, global_labels, unresolved_consts, const_refs + ) + assert isinstance(result, str) + assert result.startswith("__const_") # Complex expressions still get generated names + assert "B" in const_refs + + # Check that the unresolved expression is stored correctly + assert result in unresolved_consts + op_name, arg1, arg2 = unresolved_consts[result] + assert op_name == "add" + assert arg1 == 10 # A was resolved + assert isinstance(arg2, str) and arg2 == "B" # B is unresolved + + # Operation with both undefined + unresolved_consts.clear() + const_refs.clear() + result = try_evaluate_const_expr( + ("mul", "$B", "$C"), constants, global_labels, unresolved_consts, const_refs + ) + assert isinstance(result, str) + assert result.startswith("__const_") + assert "B" in const_refs + assert "C" in const_refs + + +def test_venom_with_undefined_constants(): + """Test parsing Venom code with undefined constants in instructions.""" + code = """ + const A = 100 + + function main { + entry: + %1 = $A + %2 = add $A, $UNDEFINED + %3 = mul $UNDEFINED2, 10 + ret + } + """ + + ctx = parse_venom(code) + + # Check that defined constant is resolved + assert ctx.constants["A"] == 100 + + # Check that undefined references are tracked + assert len(ctx.const_refs) >= 2 or len(ctx.unresolved_consts) >= 2 + + # Check instructions + fn = ctx.entry_function + bb = fn.get_basic_block("entry") + instructions = bb.instructions + + # First instruction should have resolved value + assert instructions[0].opcode == "store" + assert isinstance(instructions[0].operands[0], IRLiteral) + assert instructions[0].operands[0].value == 100 + + # Second instruction should be add with label for unresolved expression + assert instructions[1].opcode == "add" + # At least one operand should be a label + has_label = any(isinstance(op, IRLabel) for op in instructions[1].operands) + assert has_label + + # Third instruction should be mul with label for unresolved expression + assert instructions[2].opcode == "mul" + has_label = any(isinstance(op, IRLabel) for op in instructions[2].operands) + assert has_label + + +def test_venom_undefined_in_instruction_operands(): + """Test undefined constants used directly in instruction operands.""" + code = """ + const SIZE = 32 + + function test { + entry: + %1 = add $SIZE, $UNDEFINED_OFFSET + %2 = mul $UNDEFINED_FACTOR, 10 + mstore $UNDEFINED_ADDR, %1 + ret + } + """ + + ctx = parse_venom(code) + + # Check that undefined constants are tracked + assert len(ctx.const_refs) > 0 + assert "UNDEFINED_OFFSET" in ctx.const_refs or len(ctx.unresolved_consts) > 0 + + fn = ctx.entry_function + bb = fn.get_basic_block("entry") + + # Check add instruction - should use labels for unresolved expressions + add_inst = next(inst for inst in bb.instructions if inst.opcode == "add") + # At least one operand should be a label (for the unresolved expression) + has_label = any(isinstance(op, IRLabel) for op in add_inst.operands) + assert has_label + + # Check mul instruction + mul_inst = next(inst for inst in bb.instructions if inst.opcode == "mul") + has_label = any(isinstance(op, IRLabel) for op in mul_inst.operands) + assert has_label + + +def test_complex_undefined_chain(): + """Test complex chains of undefined constants.""" + code = """ + const BASE = 100 + const STEP = 10 + + function compute { + entry: + %1 = add $BASE, $UNDEFINED_A + %2 = mul %1, $STEP + %3 = add %2, $UNDEFINED_B + %4 = max %3, $BASE + ret %4 + } + """ + + ctx = parse_venom(code) + + # Should track multiple undefined constants + assert len(ctx.const_refs) >= 2 or len(ctx.unresolved_consts) >= 2 + + # The computation chain should work even with undefined constants + fn = ctx.entry_function + bb = fn.get_basic_block("entry") + assert len(bb.instructions) >= 5 # 4 computations + ret + + +def test_undefined_const_end_to_end(): + """Test end-to-end compilation with undefined constants that get resolved in assembly.""" + code = """ + const DEFINED_A = 100 + + function main { + entry: + %1 = $DEFINED_A + %2 = $UNDEFINED_X + %3 = 0 + %4 = 32 + mstore %1, %3 + mstore %2, %4 + stop + } + """ + + ctx = parse_venom(code) + + assert len(ctx.const_refs) >= 1 + assert "UNDEFINED_X" in ctx.const_refs + + # Generate assembly + compiler = VenomCompiler(ctx) + asm = compiler.generate_evm_assembly(no_optimize=True) + + assert len(ctx.unresolved_consts) >= 1 + + # Now add the missing constant definitions to the assembly + # This simulates the "linking" step where external constants are provided + # Since we use the actual constant names, we can just add them directly + asm.insert(0, CONST("UNDEFINED_X", 50)) + + bytecode, _ = assembly_to_evm(asm) + + assert len(bytecode) > 0 + + +def test_undefined_const_with_operations(): + code = """ + const BASE = 1000 + + function compute { + entry: + %1 = add $BASE, $EXTERNAL_OFFSET + %2 = sub %1, $EXTERNAL_FEE + %3 = add %2, $EXTERNAL_BONUS + ret %3 + } + """ + + ctx = parse_venom(code) + + # Generate assembly + compiler = VenomCompiler(ctx) + asm = compiler.generate_evm_assembly(no_optimize=True) + + # Add the external constant definitions directly by name + asm.insert(0, CONST("EXTERNAL_OFFSET", 500)) + asm.insert(0, CONST("EXTERNAL_FEE", 100)) + asm.insert(0, CONST("EXTERNAL_BONUS", 50)) + + bytecode, _ = assembly_to_evm(asm) + assert len(bytecode) > 0 + + +def test_undefined_const_linking_example(): + """Example showing how external constants can be linked in a clean way.""" + # Venom code using external constants and labels + code = """ + const SLOT_SIZE = 32 + + function storage_access { + entry: + %slot = add $STORAGE_BASE, $SLOT_OFFSET + %addr = mul %slot, $SLOT_SIZE + %val = sload @deploy_addr + return %val + } + """ + + ctx = parse_venom(code) + compiler = VenomCompiler(ctx) + asm = compiler.generate_evm_assembly(no_optimize=True) + + asm.insert(0, CONST("STORAGE_BASE", 0x1000)) + asm.insert(0, CONST("SLOT_OFFSET", 5)) + asm.insert(0, Label("deploy_addr")) + + # Compile to bytecode + bytecode, _ = assembly_to_evm(asm) + assert len(bytecode) > 0 diff --git a/vyper/cli/venom_main.py b/vyper/cli/venom_main.py index 0ceeae73f6..0e1c849e56 100755 --- a/vyper/cli/venom_main.py +++ b/vyper/cli/venom_main.py @@ -35,6 +35,9 @@ def _parse_args(argv: list[str]): parser.add_argument( "--stdin", action="store_true", help="whether to pull venom input from stdin" ) + parser.add_argument( + "--asm", action="store_true", help="output assembly without compiling to bytecode" + ) args = parser.parse_args(argv) @@ -61,8 +64,13 @@ def _parse_args(argv: list[str]): run_passes_on(ctx, OptimizationLevel.default()) asm = generate_assembly_experimental(ctx) - bytecode, _ = generate_bytecode(asm) - print(f"0x{bytecode.hex()}") + + if args.asm: + for inst in asm: + print(inst) + else: + bytecode, _ = generate_bytecode(asm) + print(f"0x{bytecode.hex()}") if __name__ == "__main__": diff --git a/vyper/venom/const_eval.py b/vyper/venom/const_eval.py new file mode 100644 index 0000000000..fd8301b1f9 --- /dev/null +++ b/vyper/venom/const_eval.py @@ -0,0 +1,170 @@ +""" +Constant expression evaluator for Venom IR. + +Supports simple expressions with function-style notation: +- Literals: 123, 0x100 +- Constant references: $CONST_NAME +- Label references: @label_name +- Operations: add(a, b), sub(a, b), mul(a, b), div(a, b), mod(a, b), max(a, b), min(a, b) +""" +from typing import Any, Union +from vyper.venom.basicblock import IRLiteral +from vyper.exceptions import CompilerPanic + + +class ConstEvalException(CompilerPanic): + pass + + +_const_label_counter = 0 + + +def generate_const_label_name() -> str: + """Generate a unique label name for an unresolved constant.""" + global _const_label_counter + label = f"__const_{_const_label_counter}" + _const_label_counter += 1 + return label + + +def evaluate_const_expr(expr: Any, constants: dict[str, int], global_labels: dict[str, int]) -> int: + # Handle simple cases first + if isinstance(expr, int): + return expr + + if isinstance(expr, IRLiteral): + return expr.value + + if isinstance(expr, str): + # Check if it's a constant reference ($NAME) + if expr.startswith("$"): + const_name = expr[1:] + if const_name not in constants: + raise ConstEvalException(f"Undefined constant: {const_name}") + return constants[const_name] + + # Check if it's a label reference (@NAME) + if expr.startswith("@"): + label_name = expr[1:] + if label_name not in global_labels: + raise ConstEvalException(f"Undefined global label: {label_name}") + return global_labels[label_name] + + # Otherwise it might be a plain identifier (shouldn't happen in well-formed expressions) + raise ConstEvalException(f"Invalid constant expression: {expr}") + + # Handle function-style operations + if isinstance(expr, tuple) and len(expr) == 3: + op_name, arg1, arg2 = expr + + # Recursively evaluate arguments + val1 = evaluate_const_expr(arg1, constants, global_labels) + val2 = evaluate_const_expr(arg2, constants, global_labels) + + # Perform operation + if op_name == "add": + return val1 + val2 + elif op_name == "sub": + return val1 - val2 + elif op_name == "mul": + return val1 * val2 + elif op_name == "div": + if val2 == 0: + raise ConstEvalException("Division by zero in const expression") + return val1 // val2 # Integer division + elif op_name == "mod": + if val2 == 0: + raise ConstEvalException("Modulo by zero in const expression") + return val1 % val2 + elif op_name == "max": + return max(val1, val2) + elif op_name == "min": + return min(val1, val2) + else: + raise ConstEvalException(f"Unknown operation: {op_name}") + + raise ConstEvalException(f"Invalid constant expression format: {expr}") + + +def try_evaluate_const_expr( + expr: Any, + constants: dict[str, int], + global_labels: dict[str, int], + unresolved_consts: dict[str, Any], + const_refs: set[str], +) -> Union[int, str]: + # Import here to avoid circular imports + from vyper.venom.basicblock import IRLiteral + + # Handle simple cases first + if isinstance(expr, int): + return expr + + if isinstance(expr, IRLiteral): + return expr.value + + if isinstance(expr, str): + # Check if it's a constant reference ($NAME) + if expr.startswith("$"): + const_name = expr[1:] + if const_name not in constants: + # Use the constant name directly as the label for simple references + const_refs.add(const_name) + if const_name not in unresolved_consts: + unresolved_consts[const_name] = ("ref", const_name) + return const_name + return constants[const_name] + + # Check if it's a label reference (@NAME) + if expr.startswith("@"): + label_name = expr[1:] + if label_name not in global_labels: + # For undefined labels, we still throw since labels should be defined + raise ConstEvalException(f"Undefined global label: {label_name}") + return global_labels[label_name] + + # Otherwise it might be a plain identifier + raise ConstEvalException(f"Invalid constant expression: {expr}") + + # Handle operations + if isinstance(expr, tuple) and len(expr) == 3: + op_name, arg1, arg2 = expr + + # Recursively evaluate arguments + val1 = try_evaluate_const_expr( + arg1, constants, global_labels, unresolved_consts, const_refs + ) + val2 = try_evaluate_const_expr( + arg2, constants, global_labels, unresolved_consts, const_refs + ) + + # If both values are integers, we can compute the result + if isinstance(val1, int) and isinstance(val2, int): + # Perform operation + if op_name == "add": + return val1 + val2 + elif op_name == "sub": + return val1 - val2 + elif op_name == "mul": + return val1 * val2 + elif op_name == "div": + if val2 == 0: + raise ConstEvalException("Division by zero in const expression") + return val1 // val2 + elif op_name == "mod": + if val2 == 0: + raise ConstEvalException("Modulo by zero in const expression") + return val1 % val2 + elif op_name == "max": + return max(val1, val2) + elif op_name == "min": + return min(val1, val2) + else: + raise ConstEvalException(f"Unknown operation: {op_name}") + + # Otherwise, create a label for this unresolved expression + label = generate_const_label_name() + unresolved_consts[label] = (op_name, val1, val2) + return label + + raise ConstEvalException(f"Invalid constant expression format: {expr}") diff --git a/vyper/venom/context.py b/vyper/venom/context.py index 5673ab57a4..f0d0743295 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from typing import Iterator, Optional +from typing import Any, Iterator, Optional from vyper.venom.basicblock import IRBasicBlock, IRLabel, IRVariable from vyper.venom.function import IRFunction @@ -37,6 +37,8 @@ class IRContext: data_segment: list[DataSection] last_label: int last_variable: int + unresolved_consts: dict[str, Any] # Maps temp label to const expression + const_refs: set[str] # Tracks undefined constant references def __init__(self) -> None: self.functions = {} @@ -44,6 +46,8 @@ def __init__(self) -> None: self.data_segment = [] self.constants = {} self.global_labels = {} + self.unresolved_consts = {} + self.const_refs = set() self.last_label = 0 self.last_variable = 0 diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index 42d12bf475..9546e1456d 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -12,6 +12,7 @@ IROperand, IRVariable, ) +from vyper.venom.const_eval import evaluate_const_expr, try_evaluate_const_expr from vyper.venom.context import IRContext from vyper.venom.function import IRFunction @@ -28,16 +29,19 @@ # Allow multiple comment styles COMMENT: ";" /[^\\n]*/ | "//" /[^\\n]*/ | "#" /[^\\n]*/ - start: (global_label | function)* + start: (const_def | global_label | function)* + + # Constant definitions + const_def: "const" IDENT "=" const_expr NEWLINE+ # Global label definitions with optional address override - global_label: label_name ":" CONST + global_label: label_name ":" const_expr NEWLINE+ function: "function" func_name "{" block_content "}" block_content: (label_decl | statement)* - label_decl: (IDENT | ESCAPED_STRING) ":" ("@" CONST)? ("[" tag_list "]")? NEWLINE+ + label_decl: (IDENT | ESCAPED_STRING) ":" ("[" tag_list "]")? NEWLINE+ tag_list: tag ("," tag)* tag: IDENT @@ -51,7 +55,7 @@ operands_list: operand ("," operand)* - operand: VAR_IDENT | CONST | label_ref | HEXSTR + operand: VAR_IDENT | const_expr | HEXSTR VAR_IDENT: "%" (DIGIT|LETTER|"_"|":")+ @@ -66,6 +70,12 @@ HEXSTR: "x" DOUBLE_QUOTE (HEXDIGIT|"_")+ DOUBLE_QUOTE CONST: SIGNED_INT | "0x" HEXDIGIT+ + # Constant expressions + const_expr: const_atom | const_func + const_func: IDENT "(" const_expr ("," const_expr)* ")" + const_atom: CONST | const_ref | label_ref + const_ref: "$" IDENT + %ignore WS %ignore COMMENT """ @@ -112,6 +122,10 @@ class _GlobalLabel(_TypedItem): pass +class _ConstDef(_TypedItem): + pass + + class _LabelDecl: """Represents a block declaration in the parse tree.""" @@ -127,19 +141,29 @@ class VenomTransformer(Transformer): def start(self, children) -> IRContext: ctx = IRContext() - # Separate global labels and functions + # Separate const defs, global labels and functions + const_defs = [] global_labels = [] funcs = [] for child in children: - if isinstance(child, _GlobalLabel): + if isinstance(child, _ConstDef): + const_defs.append(child) + elif isinstance(child, _GlobalLabel): global_labels.append(child) else: funcs.append(child) + # Process const definitions first + for const_def in const_defs: + name, expr = const_def.children + value = self._evaluate_const_expr(expr, ctx.constants, ctx.global_labels) + ctx.add_constant(name, value) + # Process global labels for global_label in global_labels: - name, address = global_label.children + name, expr = global_label.children + address = self._evaluate_const_expr(expr, ctx.constants, ctx.global_labels) ctx.add_global_label(name, address) # Process functions @@ -174,7 +198,7 @@ def start(self, children) -> IRContext: ) ) current_block_label = item.label - current_block_address = item.address + current_block_address = item.address # Will always be None now current_block_tags = item.tags current_block_instructions = [] elif isinstance(item, IRInstruction): @@ -194,11 +218,8 @@ def start(self, children) -> IRContext: for block_data in blocks: # All blocks now have: (block_name, address, instructions, tags) - block_name, address, instructions, tags = block_data - if address is not None: - bb = IRBasicBlock(IRLabel(block_name, True, address), fn) - else: - bb = IRBasicBlock(IRLabel(block_name, True), fn) + block_name, _address, instructions, tags = block_data + bb = IRBasicBlock(IRLabel(block_name, True), fn) # Set is_volatile if "pinned" tag is present if "pinned" in tags: @@ -208,7 +229,28 @@ def start(self, children) -> IRContext: for instruction in instructions: assert isinstance(instruction, IRInstruction) # help mypy - bb.insert_instruction(instruction) + # Process instruction operands to evaluate const expressions + processed_operands = [] + for op in instruction.operands: + if isinstance(op, (str, tuple)) and not isinstance(op, IROperand): + # This is a const expression - evaluate it + if isinstance(op, str) and op.startswith("@"): + # This is a label reference that came from const_atom + # Convert it back to IRLabel + label_name = op[1:] + processed_operands.append(IRLabel(label_name, True)) + else: + # Use try_evaluate to handle undefined constants + processed_operands.append(self._try_evaluate_const_expr(op, ctx)) + else: + processed_operands.append(op) + # Create new instruction with evaluated operands + new_inst = IRInstruction( + instruction.opcode, processed_operands, output=instruction.output + ) + new_inst.ast_source = instruction.ast_source + new_inst.error_msg = instruction.error_msg + bb.insert_instruction(new_inst) _set_last_var(fn) @@ -216,9 +258,34 @@ def start(self, children) -> IRContext: return ctx + def _evaluate_const_expr( + self, expr, constants: dict[str, int], global_labels: dict[str, int] + ) -> int: + """Helper method to evaluate const expressions.""" + return evaluate_const_expr(expr, constants, global_labels) + + def _try_evaluate_const_expr(self, expr, ctx: IRContext) -> IROperand: + """Try to evaluate const expression, returning IRLabel for unresolved parts.""" + result = try_evaluate_const_expr( + expr, ctx.constants, ctx.global_labels, ctx.unresolved_consts, ctx.const_refs + ) + if isinstance(result, int): + return IRLiteral(result) + else: + # result is a label name for unresolved constant + return IRLabel(result, True) + + def const_def(self, children) -> _ConstDef: + # Filter out NEWLINE tokens + filtered = [c for c in children if not (hasattr(c, "type") and c.type == "NEWLINE")] + name, expr = filtered + return _ConstDef([str(name), expr]) + def global_label(self, children) -> _GlobalLabel: - name, address_literal = children - return _GlobalLabel([name, address_literal.value]) + # Filter out NEWLINE tokens + filtered = [c for c in children if not (hasattr(c, "type") and c.type == "NEWLINE")] + name, expr = filtered + return _GlobalLabel([name, expr]) def function(self, children) -> tuple[str, list]: name, block_content = children @@ -229,19 +296,19 @@ def block_content(self, children) -> list: return children def label_decl(self, children) -> _LabelDecl: - # children[0] is the label, optional address, optional tags, then NEWLINE tokens + # children[0] is the label, optional tags, then NEWLINE tokens label = _unescape(str(children[0])) - address = None tags = [] # Process children after the label for child in children[1:]: - if isinstance(child, IRLiteral): - address = child.value + # Skip NEWLINE tokens + if hasattr(child, "type") and child.type == "NEWLINE": + continue elif isinstance(child, list): # tag_list returns a list tags = child - return _LabelDecl(label, address, tags) + return _LabelDecl(label, None, tags) def statement(self, children) -> IRInstruction: # children[0] is the instruction/assignment, rest are NEWLINE tokens @@ -254,6 +321,13 @@ def assignment(self, children) -> IRInstruction: return value if isinstance(value, (IRLiteral, IRVariable, IRLabel)): return IRInstruction("store", [value], output=to) + # Handle const expressions that need evaluation + if isinstance(value, (str, tuple)): + # This will be evaluated later in the function processing + return IRInstruction("store", [value], output=to) # type: ignore[list-item] + # Handle raw integers from const_atom + if isinstance(value, int): + return IRInstruction("store", [IRLiteral(value)], output=to) raise TypeError(f"Unexpected value {value} of type {type(value)}") def expr(self, children) -> IRInstruction | IROperand: @@ -277,16 +351,27 @@ def instruction(self, children) -> IRInstruction: else: raise ValueError(f"Unexpected instruction children: {children}") + # Process operands - evaluate const expressions if needed + processed_operands = [] + for op in operands: + if isinstance(op, (str, tuple)) and not isinstance(op, IROperand): + # This is a const expression that needs evaluation + # We need access to context, so we'll store it as-is for now + # and process it later during function processing + processed_operands.append(op) + else: + processed_operands.append(op) + # reverse operands, venom internally represents top of stack # as rightmost operand if opcode == "invoke": # reverse stack arguments but not label arg # invoke - operands = [operands[0]] + list(reversed(operands[1:])) + processed_operands = [processed_operands[0]] + list(reversed(processed_operands[1:])) # special cases: operands with labels look better un-reversed elif opcode not in ("jmp", "jnz", "djmp", "phi", "db"): - operands.reverse() - return IRInstruction(opcode, operands) + processed_operands.reverse() + return IRInstruction(opcode, processed_operands) # type: ignore[arg-type] def operands_list(self, children) -> list[IROperand]: return children @@ -339,6 +424,37 @@ def tag_list(self, children) -> list[str]: def tag(self, children) -> str: return str(children[0]) + def const_expr(self, children): + # const_expr: const_atom | const_func + return children[0] + + def const_atom(self, children): + # const_atom: CONST | const_ref | label_ref + child = children[0] + if isinstance(child, IRLiteral): + return child + elif isinstance(child, IRLabel): + # Return as a label reference to be evaluated later + return f"@{child.value}" + else: + # Must be a const_ref (string starting with $) + return child + + def const_ref(self, children) -> str: + # const_ref: "$" IDENT + return f"${children[0]}" + + def const_func(self, children): + # const_func: IDENT "(" const_expr ("," const_expr)* ")" + op_name = str(children[0]) + args = children[1:] + + if len(args) != 2: + raise ValueError(f"Operation {op_name} requires exactly 2 arguments, got {len(args)}") + + # Return a tuple representing the operation + return (op_name, args[0], args[1]) + def parse_venom(source: str) -> IRContext: tree = VENOM_PARSER.parse(source) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 14fc04305a..51a05d56e0 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -14,6 +14,7 @@ TaggedInstruction, ) from vyper.evm.assembler.optimizer import optimize_assembly +from vyper.evm.assembler.symbols import CONST_ADD, CONST_MAX, CONST_SUB, CONSTREF from vyper.exceptions import CompilerPanic, StackTooDeep from vyper.utils import MemoryPositions, OrderedSet, wrap256 from vyper.venom.analysis import CFGAnalysis, DFGAnalysis, IRAnalysesCache, LivenessAnalysis @@ -170,6 +171,24 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr for var_name, _var_value in self.ctx.global_labels.items(): asm.append(Label(var_name)) + # Emit unresolved constants + for label_name, expr in self.ctx.unresolved_consts.items(): + if isinstance(expr, tuple) and len(expr) > 0 and expr[0] == "ref": + # Simple reference to undefined constant - don't emit anything + # The assembler will handle the undefined reference error + pass + elif isinstance(expr, tuple) and len(expr) == 3: + # Binary operation + op_name, arg1, arg2 = expr + # Emit the appropriate CONST_* operation + if op_name == "add": + asm.append(CONST_ADD(label_name, arg1, arg2)) # type: ignore[arg-type] + elif op_name == "sub": + asm.append(CONST_SUB(label_name, arg1, arg2)) # type: ignore[arg-type] + elif op_name == "max": + asm.append(CONST_MAX(label_name, arg1, arg2)) # type: ignore[arg-type] + # TODO: Add other operations as needed + for fn in self.ctx.functions.values(): ac = IRAnalysesCache(fn) @@ -248,7 +267,12 @@ def _emit_input_operands( # invoke emits the actual instruction itself so we don't need # to emit it here but we need to add it to the stack map if inst.opcode != "invoke": - assembly.append(PUSHLABEL(_as_asm_symbol(op))) + # Check if this label is an unresolved constant + if op.value in self.ctx.unresolved_consts: + # Use PUSH_OFST with CONSTREF for unresolved constants + assembly.append(PUSH_OFST(CONSTREF(op.value), 0)) + else: + assembly.append(PUSHLABEL(_as_asm_symbol(op))) stack.push(op) continue From 3e8c090dda5a81e66abb169e0ab499b50325e636 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 3 Jul 2025 19:35:02 +0300 Subject: [PATCH 133/172] lint --- vyper/venom/const_eval.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vyper/venom/const_eval.py b/vyper/venom/const_eval.py index fd8301b1f9..d8e7a1cadf 100644 --- a/vyper/venom/const_eval.py +++ b/vyper/venom/const_eval.py @@ -8,8 +8,9 @@ - Operations: add(a, b), sub(a, b), mul(a, b), div(a, b), mod(a, b), max(a, b), min(a, b) """ from typing import Any, Union -from vyper.venom.basicblock import IRLiteral + from vyper.exceptions import CompilerPanic +from vyper.venom.basicblock import IRLiteral class ConstEvalException(CompilerPanic): From bc8982a6d579fec140018215ddc84c31fc86a1c7 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 3 Jul 2025 20:04:53 +0300 Subject: [PATCH 134/172] wip --- tests/unit/compiler/asm/test_assembler.py | 15 ++++++++++-- tests/unit/venom/test_const_expressions.py | 27 +++++++++++++++++++++- vyper/evm/assembler/core.py | 3 +++ vyper/evm/assembler/symbols.py | 5 ++++ vyper/venom/const_eval.py | 7 ++++-- 5 files changed, 52 insertions(+), 5 deletions(-) diff --git a/tests/unit/compiler/asm/test_assembler.py b/tests/unit/compiler/asm/test_assembler.py index e3fbead1cc..68a4335fd5 100644 --- a/tests/unit/compiler/asm/test_assembler.py +++ b/tests/unit/compiler/asm/test_assembler.py @@ -1,5 +1,5 @@ -from vyper.evm.assembler.core import _resolve_constants -from vyper.evm.assembler.symbols import CONST, CONST_ADD, CONST_MAX, CONST_SUB, CONSTREF +from vyper.evm.assembler.core import _resolve_constants, assembly_to_evm +from vyper.evm.assembler.symbols import CONST, CONST_ADD, CONST_MAX, CONST_SUB, CONSTREF, Label def test_const_add(): @@ -24,3 +24,14 @@ def test_const_max(): _resolve_constants(asm, symbol_map) assert symbol_map[CONSTREF("c")] == 2 assert symbol_map[CONSTREF("d")] == 10 + + +def test_const_add_with_label(): + asm = [CONST("a", 5), CONST_ADD("b", "a", "my_label"), CONST_ADD("c", "my_label", 10), Label("my_label")] + # symbol_map = {Label("my_label"): 15} # Pre-populate the label with a value + symbol_map = {} + _resolve_constants(asm, symbol_map) + + # Compile to bytecode + bytecode, _ = assembly_to_evm(asm) + assert len(bytecode) > 0 \ No newline at end of file diff --git a/tests/unit/venom/test_const_expressions.py b/tests/unit/venom/test_const_expressions.py index c016eaa104..93d62fd46e 100644 --- a/tests/unit/venom/test_const_expressions.py +++ b/tests/unit/venom/test_const_expressions.py @@ -447,7 +447,7 @@ def test_undefined_const_with_operations(): assert len(bytecode) > 0 -def test_undefined_const_linking_example(): +def test_undefined_const_label_linking_example(): """Example showing how external constants can be linked in a clean way.""" # Venom code using external constants and labels code = """ @@ -473,3 +473,28 @@ def test_undefined_const_linking_example(): # Compile to bytecode bytecode, _ = assembly_to_evm(asm) assert len(bytecode) > 0 + +def test_undefined_const_label_expression_linking_example(): + code = """ + const SLOT_SIZE = 32 + + function storage_access { + entry: + %slot = add $STORAGE_BASE, $SLOT_OFFSET + %addr = mul %slot, $SLOT_SIZE + %val = sload add(@deploy_addr, $SLOT_SIZE) + return %val + } + """ + + ctx = parse_venom(code) + compiler = VenomCompiler(ctx) + asm = compiler.generate_evm_assembly(no_optimize=True) + + asm.insert(0, CONST("STORAGE_BASE", 0x1000)) + asm.insert(0, CONST("SLOT_OFFSET", 5)) + asm.insert(0, Label("deploy_addr")) + + # Compile to bytecode + bytecode, _ = assembly_to_evm(asm) + assert len(bytecode) > 0 \ No newline at end of file diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 3f283066f6..0248908f64 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -264,6 +264,9 @@ def resolve_symbols( if isinstance(item, CONST): continue # CONST declarations do not go into bytecode + + if isinstance(item, BaseConstOp): + continue # CONST operations do not go into bytecode # update pc if isinstance(item, JUMPDEST): diff --git a/vyper/evm/assembler/symbols.py b/vyper/evm/assembler/symbols.py index 26f0e1226c..fb79ef0714 100644 --- a/vyper/evm/assembler/symbols.py +++ b/vyper/evm/assembler/symbols.py @@ -67,9 +67,14 @@ def __eq__(self, other): def _resolve_operand(self, operand: str | int, symbol_map: dict[SymbolKey, int]) -> int | None: if isinstance(operand, str): + # Try as CONSTREF first op_ref = CONSTREF(operand) if op_ref in symbol_map: return symbol_map[op_ref] + # Try as Label + label = Label(operand) + if label in symbol_map: + return symbol_map[label] elif isinstance(operand, int): return operand return None diff --git a/vyper/venom/const_eval.py b/vyper/venom/const_eval.py index d8e7a1cadf..7b4e318eea 100644 --- a/vyper/venom/const_eval.py +++ b/vyper/venom/const_eval.py @@ -120,8 +120,11 @@ def try_evaluate_const_expr( if expr.startswith("@"): label_name = expr[1:] if label_name not in global_labels: - # For undefined labels, we still throw since labels should be defined - raise ConstEvalException(f"Undefined global label: {label_name}") + # Treat undefined labels like undefined constants + const_refs.add(label_name) + if label_name not in unresolved_consts: + unresolved_consts[label_name] = ("ref", label_name) + return label_name return global_labels[label_name] # Otherwise it might be a plain identifier From 2463369cc8e4e5dbd8568408aac27a37d646a54f Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 3 Jul 2025 20:40:41 +0300 Subject: [PATCH 135/172] wip --- tests/unit/compiler/asm/test_assembler.py | 81 ++++++++-- vyper/evm/assembler/core.py | 171 ++++++++++++++++++++-- 2 files changed, 233 insertions(+), 19 deletions(-) diff --git a/tests/unit/compiler/asm/test_assembler.py b/tests/unit/compiler/asm/test_assembler.py index 68a4335fd5..39271342c4 100644 --- a/tests/unit/compiler/asm/test_assembler.py +++ b/tests/unit/compiler/asm/test_assembler.py @@ -1,11 +1,21 @@ -from vyper.evm.assembler.core import _resolve_constants, assembly_to_evm +import pytest + +from vyper.evm.assembler.core import ( + PUSH_OFST, + _assembly_to_evm, + _resolve_constants, + assembly_to_evm, + resolve_symbols, +) from vyper.evm.assembler.symbols import CONST, CONST_ADD, CONST_MAX, CONST_SUB, CONSTREF, Label +from vyper.exceptions import CompilerPanic def test_const_add(): asm = [CONST("a", 1), CONST("b", 2), CONST_ADD("c", "a", "b"), CONST_ADD("d", "c", 10)] symbol_map = {} - _resolve_constants(asm, symbol_map) + label_dependent_consts = _resolve_constants(asm, symbol_map) + assert len(label_dependent_consts) == 0 assert symbol_map[CONSTREF("c")] == 3 assert symbol_map[CONSTREF("d")] == 13 @@ -13,7 +23,9 @@ def test_const_add(): def test_const_sub(): asm = [CONST("a", 1), CONST("b", 2), CONST_SUB("c", "a", "b"), CONST_ADD("d", "c", 10)] symbol_map = {} - _resolve_constants(asm, symbol_map) + label_dependent_consts = _resolve_constants(asm, symbol_map) + + assert len(label_dependent_consts) == 0 assert symbol_map[CONSTREF("c")] == -1 assert symbol_map[CONSTREF("d")] == 9 @@ -21,17 +33,68 @@ def test_const_sub(): def test_const_max(): asm = [CONST("a", 1), CONST("b", 2), CONST_MAX("c", "a", "b"), CONST_MAX("d", "c", 10)] symbol_map = {} - _resolve_constants(asm, symbol_map) + label_dependent_consts = _resolve_constants(asm, symbol_map) + assert len(label_dependent_consts) == 0 assert symbol_map[CONSTREF("c")] == 2 assert symbol_map[CONSTREF("d")] == 10 def test_const_add_with_label(): - asm = [CONST("a", 5), CONST_ADD("b", "a", "my_label"), CONST_ADD("c", "my_label", 10), Label("my_label")] - # symbol_map = {Label("my_label"): 15} # Pre-populate the label with a value - symbol_map = {} - _resolve_constants(asm, symbol_map) + asm = [ + CONST("a", 5), + Label("my_label"), + "JUMPDEST", # This will create actual bytecode at my_label + CONST_ADD("b", "a", "my_label"), # a + my_label position + CONST_ADD("c", "my_label", 100), # my_label position + 100 + PUSH_OFST(CONSTREF("b"), 0), # Push the computed constant value + PUSH_OFST(CONSTREF("c"), 0), # Push the computed constant value + ] + + # Resolve symbols and constants + symbol_map, _ = resolve_symbols(asm) + + # Check that constants were calculated correctly + assert symbol_map[CONSTREF("a")] == 5 + assert symbol_map[Label("my_label")] == 0 # First position after constants + assert symbol_map[CONSTREF("b")] == 5 # a + my_label = 5 + 0 + assert symbol_map[CONSTREF("c")] == 100 # my_label + 100 = 0 + 100 # Compile to bytecode bytecode, _ = assembly_to_evm(asm) - assert len(bytecode) > 0 \ No newline at end of file + assert len(bytecode) > 0 + + + +def test_const_add_with_label_overflow(): + # Create assembly with a large offset that will overflow when added to a label + asm = [ + Label("start"), + "JUMPDEST", + CONST("huge_offset", 65000), + CONST_ADD("overflow", "start", 1000), # This is OK (0 + 1000) + CONST_ADD("will_overflow", "overflow", "huge_offset"), # 1000 + 65000 = 66000 > 65535 + PUSH_OFST(CONSTREF("will_overflow"), 0), + ] + + with pytest.raises(CompilerPanic) as exc_info: + resolve_symbols(asm) + + assert "exceeds 16-bit limit" in str(exc_info.value) + assert "will_overflow" in str(exc_info.value) + + +def test_push_ofst_with_label_dependent_const_overflow(): + # Test PUSH_OFST overflow checking during bytecode generation + + # Manually create a symbol map that would cause overflow + symbol_map = {Label("far_label"): 10000, CONSTREF("near_limit"): 65000} # Just under the limit + + # This should fail when PUSH_OFST tries to add an offset that causes overflow + test_asm = [PUSH_OFST(CONSTREF("near_limit"), 1000)] # 65000 + 1000 > 65535 + + label_dependent_consts = {"near_limit"} # Mark as label-dependent + + with pytest.raises(CompilerPanic) as exc_info: + _assembly_to_evm(test_asm, symbol_map, label_dependent_consts) + + assert "exceeds 16-bit limit" in str(exc_info.value) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 0248908f64..6a0cb4a450 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -175,11 +175,44 @@ def _add_to_symbol_map(symbol_map: dict[SymbolKey, int], item: SymbolKey, value: symbol_map[item] = value -def _resolve_constants(assembly: list[AssemblyInstruction], symbol_map: dict[SymbolKey, int]): +def _resolve_constants( + assembly: list[AssemblyInstruction], symbol_map: dict[SymbolKey, int] +) -> set[str]: + """ + Resolve constant values and track which constants depend on labels. + + Returns: + Set of constant names that depend on labels (directly or indirectly) + """ + label_dependent_consts: set[str] = set() + + # First, add simple CONST declarations for item in assembly: if isinstance(item, CONST): _add_to_symbol_map(symbol_map, CONSTREF(item.name), item.value) + # Track which constants reference labels (we'll check this later after labels are positioned) + # For now, just identify constants that have string operands that might be labels + # Collect all constant names first (including those from CONST declarations) + all_const_names = set() + for item in assembly: + if isinstance(item, CONST): + all_const_names.add(item.name) + elif isinstance(item, BaseConstOp): + all_const_names.add(item.name) + + potential_label_refs = set() + for item in assembly: + if isinstance(item, BaseConstOp): + # Check if any operand is a string that could be a label + for operand in [item.op1, item.op2]: + if isinstance(operand, str): + # Check if it's not a known constant name + if operand not in all_const_names: + # This could be a label reference + label_dependent_consts.add(item.name) + potential_label_refs.add(operand) + max_iterations = 100 # Prevent infinite loops from circular dependencies iterations = 0 @@ -191,6 +224,20 @@ def _resolve_constants(assembly: list[AssemblyInstruction], symbol_map: dict[Sym if CONSTREF(item.name) in symbol_map: continue + # Skip if this is a label-dependent constant + if item.name in label_dependent_consts: + continue + + # Check if this constant depends on other label-dependent constants + for operand in [item.op1, item.op2]: + if isinstance(operand, str) and operand in label_dependent_consts: + label_dependent_consts.add(item.name) + continue # Skip this constant too + + # Skip if we already know it's label-dependent + if item.name in label_dependent_consts: + continue + # Calculate the value if possible if (value := item.calculate(symbol_map)) is not None: _add_to_symbol_map(symbol_map, CONSTREF(item.name), value) @@ -206,10 +253,62 @@ def _resolve_constants(assembly: list[AssemblyInstruction], symbol_map: dict[Sym unresolved = [] for item in assembly: if isinstance(item, BaseConstOp) and CONSTREF(item.name) not in symbol_map: - unresolved.append(item.name) + # Only report non-label-dependent constants as unresolved here + if item.name not in label_dependent_consts: + unresolved.append(item.name) if unresolved: raise CompilerPanic(f"Circular dependency detected in constants: {unresolved}") + return label_dependent_consts + + +def _resolve_label_dependent_constants( + assembly: list[AssemblyInstruction], + symbol_map: dict[SymbolKey, int], + label_dependent_consts: set[str], +): + """ + Resolve constants that depend on labels, now that labels have been positioned. + Validates that values fit within 16-bit PUSH2 limit. + """ + max_push2_value = 0xFFFF # 65535 - maximum value for PUSH2 + + # Try to resolve remaining constants + max_iterations = 100 + iterations = 0 + + while iterations < max_iterations: + changed = False + for item in assembly: + if isinstance(item, BaseConstOp): + const_ref = CONSTREF(item.name) + # Skip if already resolved + if const_ref in symbol_map: + continue + + # Try to calculate the value + if (value := item.calculate(symbol_map)) is not None: + # Check overflow for label-dependent constants + if item.name in label_dependent_consts and value > max_push2_value: + raise CompilerPanic( + f"Label-dependent constant '{item.name}' has value {value} (constants involving labels must fit in PUSH2 instructions) " + ) + _add_to_symbol_map(symbol_map, const_ref, value) + changed = True + + if not changed: + break + + iterations += 1 + + # Check for unresolved constants + unresolved = [] + for item in assembly: + if isinstance(item, BaseConstOp) and CONSTREF(item.name) not in symbol_map: + unresolved.append(item.name) + if unresolved: + raise CompilerPanic(f"Could not resolve label-dependent constants: {unresolved}") + def resolve_symbols( assembly: list[AssemblyInstruction], @@ -233,7 +332,9 @@ def resolve_symbols( pc: int = 0 - _resolve_constants(assembly, symbol_map) + # First pass: resolve constants that don't depend on labels + # and identify which constants depend on labels + label_dependent_consts = _resolve_constants(assembly, symbol_map) # resolve labels (i.e. JUMPDEST locations) to actual code locations, # and simultaneously build the source map. @@ -264,7 +365,7 @@ def resolve_symbols( if isinstance(item, CONST): continue # CONST declarations do not go into bytecode - + if isinstance(item, BaseConstOp): continue # CONST operations do not go into bytecode @@ -286,9 +387,16 @@ def resolve_symbols( if isinstance(item.label, Label): pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits elif isinstance(item.label, CONSTREF): - const = symbol_map[item.label] - val = const + item.ofst - pc += calc_push_size(val) + # Check if this constant depends on labels + const_name = item.label.label + if const_name in label_dependent_consts: + # Use fixed PUSH2 size for label-dependent constants + pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits + else: + # For non-label-dependent constants, calculate actual size + const = symbol_map[item.label] + val = const + item.ofst + pc += calc_push_size(val) else: # pragma: nocover raise CompilerPanic(f"invalid ofst {item.label}") @@ -311,6 +419,9 @@ def resolve_symbols( # magic -- probably the assembler should actually add this label _add_to_symbol_map(symbol_map, Label("code_end"), pc) + # Second pass: now that labels are positioned, resolve label-dependent constants + _resolve_label_dependent_constants(assembly, symbol_map, label_dependent_consts) + return symbol_map, source_map @@ -445,12 +556,35 @@ def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[st # This API might seem a bit strange, but it's backwards compatible symbol_map, source_map = resolve_symbols(assembly) _validate_assembly_jumps(assembly, symbol_map) - bytecode = _assembly_to_evm(assembly, symbol_map) + + # Extract label-dependent constants from the assembly for bytecode generation + label_dependent_consts = set() + for item in assembly: + if isinstance(item, BaseConstOp): + # Check if this constant references labels + for operand in [item.op1, item.op2]: + if isinstance(operand, str) and Label(operand) in symbol_map: + label_dependent_consts.add(item.name) + + # Propagate label dependency + changed = True + while changed: + changed = False + for item in assembly: + if isinstance(item, BaseConstOp) and item.name not in label_dependent_consts: + for operand in [item.op1, item.op2]: + if isinstance(operand, str) and operand in label_dependent_consts: + label_dependent_consts.add(item.name) + changed = True + + bytecode = _assembly_to_evm(assembly, symbol_map, label_dependent_consts) return bytecode, source_map def _assembly_to_evm( - assembly: list[AssemblyInstruction], symbol_map: dict[SymbolKey, int] + assembly: list[AssemblyInstruction], + symbol_map: dict[SymbolKey, int], + label_dependent_consts: set[str], ) -> bytes: """ Assembles assembly into EVM bytecode @@ -458,6 +592,7 @@ def _assembly_to_evm( Parameters: assembly: list of asm instructions symbol_map: dict from labels to resolved locations in the code + label_dependent_consts: set of constant names that depend on labels Returns: bytes representing the bytecode """ @@ -470,6 +605,8 @@ def _assembly_to_evm( continue # skippable opcodes elif isinstance(item, CONST): continue # CONST things do not show up in bytecode + elif isinstance(item, BaseConstOp): + continue # CONST operations do not show up in bytecode elif isinstance(item, Label): continue # Label does not show up in bytecode @@ -493,7 +630,21 @@ def _assembly_to_evm( else: assert isinstance(item.label, CONSTREF) ofst = symbol_map[item.label] + item.ofst - bytecode = _compile_push_instruction(PUSH(ofst)) + + # Check if this is a label-dependent constant + const_name = item.label.label + if const_name in label_dependent_consts: + # Use PUSH2 for label-dependent constants + # Also validate the value fits in 16 bits + if ofst > 0xFFFF: + raise CompilerPanic( + f"PUSH_OFST with label-dependent constant '{const_name}' " + f"has value {ofst} which exceeds 16-bit limit" + ) + bytecode = _compile_push_instruction(PUSH_N(ofst, SYMBOL_SIZE)) + else: + # Use optimal size for non-label-dependent constants + bytecode = _compile_push_instruction(PUSH(ofst)) ret.extend(bytecode) From a6a8aebc5cf4bd319600cf1cba2df7a4185d1c6a Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 3 Jul 2025 20:55:17 +0300 Subject: [PATCH 136/172] tests --- tests/unit/venom/test_const_expressions.py | 56 +++++++++++++++++++++- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/tests/unit/venom/test_const_expressions.py b/tests/unit/venom/test_const_expressions.py index 93d62fd46e..ce87a09eb4 100644 --- a/tests/unit/venom/test_const_expressions.py +++ b/tests/unit/venom/test_const_expressions.py @@ -1,7 +1,7 @@ import pytest from vyper.evm.assembler.core import assembly_to_evm -from vyper.evm.assembler.symbols import CONST, Label +from vyper.evm.assembler.symbols import CONST, CONST_ADD, Label from vyper.venom.basicblock import IRLabel, IRLiteral from vyper.venom.const_eval import ConstEvalException, evaluate_const_expr, try_evaluate_const_expr from vyper.venom.parser import parse_venom @@ -474,6 +474,7 @@ def test_undefined_const_label_linking_example(): bytecode, _ = assembly_to_evm(asm) assert len(bytecode) > 0 + def test_undefined_const_label_expression_linking_example(): code = """ const SLOT_SIZE = 32 @@ -483,7 +484,7 @@ def test_undefined_const_label_expression_linking_example(): %slot = add $STORAGE_BASE, $SLOT_OFFSET %addr = mul %slot, $SLOT_SIZE %val = sload add(@deploy_addr, $SLOT_SIZE) - return %val + ret %val } """ @@ -495,6 +496,57 @@ def test_undefined_const_label_expression_linking_example(): asm.insert(0, CONST("SLOT_OFFSET", 5)) asm.insert(0, Label("deploy_addr")) + # Compile to bytecode + bytecode, _ = assembly_to_evm(asm) + assert len(bytecode) > 0 + + +def test_label_dependent_const_example(): + # Test demonstrating label-dependent constants working correctly + code = """ + const OFFSET = 100 + + function example { + entry: + %base = @deploy_label + %addr = add %base, $OFFSET + ret %addr + } + """ + + ctx = parse_venom(code) + compiler = VenomCompiler(ctx) + asm = compiler.generate_evm_assembly(no_optimize=True) + + # Add a label for testing + asm.insert(0, Label("deploy_label")) + asm.insert(0, "JUMPDEST") + + # Test with CONST_ADD using label + asm.insert(0, CONST_ADD("computed_addr", "deploy_label", 50)) + + # Compile successfully + bytecode, _ = assembly_to_evm(asm) + assert len(bytecode) > 0 + +def test_undefined_const_label_expression_linking_example_2(): + code = """ + const SLOT_SIZE = 32 + + function storage_access { + entry: + %val = sload add(@deploy_addr, $SLOT_SIZE) + ret %val + deploy_addr: [pinned] + stop + } + """ + + ctx = parse_venom(code) + + compiler = VenomCompiler(ctx) + asm = compiler.generate_evm_assembly(no_optimize=True) + # Compile to bytecode bytecode, _ = assembly_to_evm(asm) assert len(bytecode) > 0 \ No newline at end of file From dd42b752899a597b2a3be1a0b77d871ee70c27f0 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 3 Jul 2025 21:05:36 +0300 Subject: [PATCH 137/172] fix no pinned required --- tests/unit/venom/test_const_expressions.py | 2 +- vyper/venom/venom_to_assembly.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/unit/venom/test_const_expressions.py b/tests/unit/venom/test_const_expressions.py index ce87a09eb4..d1af150554 100644 --- a/tests/unit/venom/test_const_expressions.py +++ b/tests/unit/venom/test_const_expressions.py @@ -537,7 +537,7 @@ def test_undefined_const_label_expression_linking_example_2(): entry: %val = sload add(@deploy_addr, $SLOT_SIZE) ret %val - deploy_addr: [pinned] + deploy_addr: stop } """ diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 51a05d56e0..670a998a9c 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -189,6 +189,15 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr asm.append(CONST_MAX(label_name, arg1, arg2)) # type: ignore[arg-type] # TODO: Add other operations as needed + # Auto-detect labels used in const expressions and mark their blocks for emission + for fn in self.ctx.functions.values(): + for bb in fn.get_basic_blocks(): + for label_name, expr in self.ctx.unresolved_consts.items(): + if isinstance(expr, tuple) and len(expr) == 3: + _, arg1, arg2 = expr + if arg1 == bb.label.value or arg2 == bb.label.value: + bb.is_pinned = True + for fn in self.ctx.functions.values(): ac = IRAnalysesCache(fn) From ae17c97590d99c4e17426d0e76e11a5135715d7d Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 3 Jul 2025 21:08:51 +0300 Subject: [PATCH 138/172] test --- tests/unit/compiler/asm/test_assembler.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/tests/unit/compiler/asm/test_assembler.py b/tests/unit/compiler/asm/test_assembler.py index 39271342c4..cc0c3eb95d 100644 --- a/tests/unit/compiler/asm/test_assembler.py +++ b/tests/unit/compiler/asm/test_assembler.py @@ -79,22 +79,5 @@ def test_const_add_with_label_overflow(): with pytest.raises(CompilerPanic) as exc_info: resolve_symbols(asm) - assert "exceeds 16-bit limit" in str(exc_info.value) assert "will_overflow" in str(exc_info.value) - -def test_push_ofst_with_label_dependent_const_overflow(): - # Test PUSH_OFST overflow checking during bytecode generation - - # Manually create a symbol map that would cause overflow - symbol_map = {Label("far_label"): 10000, CONSTREF("near_limit"): 65000} # Just under the limit - - # This should fail when PUSH_OFST tries to add an offset that causes overflow - test_asm = [PUSH_OFST(CONSTREF("near_limit"), 1000)] # 65000 + 1000 > 65535 - - label_dependent_consts = {"near_limit"} # Mark as label-dependent - - with pytest.raises(CompilerPanic) as exc_info: - _assembly_to_evm(test_asm, symbol_map, label_dependent_consts) - - assert "exceeds 16-bit limit" in str(exc_info.value) From ab3d7dcdd189db56941c946af4891b447a98e848 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 3 Jul 2025 23:16:26 +0300 Subject: [PATCH 139/172] wip --- vyper/venom/memory_location.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vyper/venom/memory_location.py b/vyper/venom/memory_location.py index ec2a2f9da8..8d36dbe969 100644 --- a/vyper/venom/memory_location.py +++ b/vyper/venom/memory_location.py @@ -5,7 +5,7 @@ from vyper.evm.address_space import MEMORY, STORAGE, TRANSIENT, AddrSpace from vyper.exceptions import CompilerPanic -from vyper.venom.basicblock import IRLiteral, IROperand, IRVariable +from vyper.venom.basicblock import IRLabel, IRLiteral, IROperand, IRVariable @dataclass(frozen=True) @@ -51,6 +51,8 @@ def from_operands( _size = size.value elif isinstance(size, IRVariable): _size = None + elif isinstance(size, IRLabel): + _size = None elif isinstance(size, int): _size = size else: # pragma: nocover From 461b700ee16efea56abcb70acff67612d70b9f17 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 4 Jul 2025 17:53:48 +0300 Subject: [PATCH 140/172] parser evaluator remove --- vyper/venom/parser.py | 34 +++++++--------------------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index 9546e1456d..ec4f1b462c 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -5,6 +5,7 @@ from vyper.venom.basicblock import ( IRBasicBlock, + IRConstExpr, IRHexString, IRInstruction, IRLabel, @@ -154,17 +155,15 @@ def start(self, children) -> IRContext: else: funcs.append(child) - # Process const definitions first + # Process const definitions - just store the raw expressions for const_def in const_defs: name, expr = const_def.children - value = self._evaluate_const_expr(expr, ctx.constants, ctx.global_labels) - ctx.add_constant(name, value) + ctx.add_const_expression(name, expr) # Process global labels for global_label in global_labels: name, expr = global_label.children - address = self._evaluate_const_expr(expr, ctx.constants, ctx.global_labels) - ctx.add_global_label(name, address) + ctx.add_global_label(name, 0) # Process functions for fn_name, items in funcs: @@ -229,28 +228,7 @@ def start(self, children) -> IRContext: for instruction in instructions: assert isinstance(instruction, IRInstruction) # help mypy - # Process instruction operands to evaluate const expressions - processed_operands = [] - for op in instruction.operands: - if isinstance(op, (str, tuple)) and not isinstance(op, IROperand): - # This is a const expression - evaluate it - if isinstance(op, str) and op.startswith("@"): - # This is a label reference that came from const_atom - # Convert it back to IRLabel - label_name = op[1:] - processed_operands.append(IRLabel(label_name, True)) - else: - # Use try_evaluate to handle undefined constants - processed_operands.append(self._try_evaluate_const_expr(op, ctx)) - else: - processed_operands.append(op) - # Create new instruction with evaluated operands - new_inst = IRInstruction( - instruction.opcode, processed_operands, output=instruction.output - ) - new_inst.ast_source = instruction.ast_source - new_inst.error_msg = instruction.error_msg - bb.insert_instruction(new_inst) + bb.insert_instruction(instruction) _set_last_var(fn) @@ -384,6 +362,8 @@ def operand(self, children) -> IROperand: hex_content = operand.removeprefix('x"').removesuffix('"') hex_content = hex_content.replace("_", "") return IRHexString(bytes.fromhex(hex_content)) + elif isinstance(operand, (str, tuple)) and not isinstance(operand, IROperand): + return IRConstExpr(operand) return operand def func_name(self, children) -> str: From 981a9b81fd01d5f9f42cdd891d462af3c2ce6cf0 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 4 Jul 2025 18:10:11 +0300 Subject: [PATCH 141/172] wip --- vyper/venom/__init__.py | 29 ++++++++++++++++++++++++++++- vyper/venom/basicblock.py | 4 ++++ vyper/venom/context.py | 6 ++++++ vyper/venom/parser.py | 3 --- vyper/venom/venom_to_assembly.py | 13 +++++++++++++ 5 files changed, 51 insertions(+), 4 deletions(-) diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 9135714c4e..0a4cb2b7b8 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -10,7 +10,8 @@ from vyper.exceptions import CompilerPanic from vyper.venom.analysis import MemSSA from vyper.venom.analysis.analysis import IRAnalysesCache -from vyper.venom.basicblock import IRBasicBlock, IRHexString, IRLabel, IRLiteral +from vyper.venom.basicblock import IRBasicBlock, IRHexString, IRLabel, IRLiteral, IROperand +from vyper.venom.const_eval import try_evaluate_const_expr from vyper.venom.context import DataSection, IRContext from vyper.venom.function import IRFunction from vyper.venom.passes import ( @@ -112,11 +113,37 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel, ac: IRAnalysesCache CFGNormalization(ac, fn).run_pass() +def _resolve_const_operands(ctx: IRContext) -> None: + """Resolve raw const expressions in operands to IRLiteral or IRLabel.""" + for fn in ctx.functions.values(): + for bb in fn.get_basic_blocks(): + for inst in bb.instructions: + new_operands = [] + for op in inst.operands: + if isinstance(op, (str, tuple)) and not isinstance(op, IROperand): + # This is a raw const expression - evaluate it + result = try_evaluate_const_expr( + op, ctx.constants, ctx.global_labels, + ctx.unresolved_consts, ctx.const_refs + ) + if isinstance(result, int): + new_operands.append(IRLiteral(result)) + else: + # Return as label for unresolved expressions + new_operands.append(IRLabel(result, True)) + else: + new_operands.append(op) + inst.operands = new_operands + + def _run_global_passes(ctx: IRContext, optimize: OptimizationLevel, ir_analyses: dict) -> None: FunctionInlinerPass(ir_analyses, ctx, optimize).run_pass() def run_passes_on(ctx: IRContext, optimize: OptimizationLevel) -> None: + # First resolve any raw const expressions in operands + _resolve_const_operands(ctx) + ir_analyses = {} for fn in ctx.functions.values(): ir_analyses[fn] = IRAnalysesCache(fn) diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 023c150927..f08c666f18 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -209,6 +209,7 @@ def __repr__(self) -> str: return f'x"{self.value.hex()}"' + class IRLabel(IROperand): """ IRLabel represents a label in IR. A label is a string that starts with a %. @@ -510,6 +511,9 @@ def out_bbs(self): term = self.last_instruction if term.opcode == "db": return [] + # Only jmp, djmp, and jnz have jump targets + if term.opcode not in ("jmp", "djmp", "jnz"): + return [] out_labels = term.get_label_operands() fn = self.parent return [fn.get_basic_block(label.name) for label in out_labels] diff --git a/vyper/venom/context.py b/vyper/venom/context.py index f0d0743295..857f308a13 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -33,6 +33,7 @@ class IRContext: functions: dict[IRLabel, IRFunction] entry_function: Optional[IRFunction] constants: dict[str, int] # globally defined constants + const_expressions: dict[str, Any] # raw const expressions (unevaluated) global_labels: dict[str, int] # globally defined labels with addresses data_segment: list[DataSection] last_label: int @@ -45,6 +46,7 @@ def __init__(self) -> None: self.entry_function = None self.data_segment = [] self.constants = {} + self.const_expressions = {} self.global_labels = {} self.unresolved_consts = {} self.const_refs = set() @@ -107,6 +109,10 @@ def add_constant(self, name: str, value: int) -> None: assert name not in self.constants self.constants[name] = value + def add_const_expression(self, name: str, expr: Any) -> None: + assert name not in self.const_expressions + self.const_expressions[name] = expr + def add_global_label(self, name: str, address: int) -> None: assert name not in self.global_labels self.global_labels[name] = address diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index ec4f1b462c..b541d69a9a 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -5,7 +5,6 @@ from vyper.venom.basicblock import ( IRBasicBlock, - IRConstExpr, IRHexString, IRInstruction, IRLabel, @@ -362,8 +361,6 @@ def operand(self, children) -> IROperand: hex_content = operand.removeprefix('x"').removesuffix('"') hex_content = hex_content.replace("_", "") return IRHexString(bytes.fromhex(hex_content)) - elif isinstance(operand, (str, tuple)) and not isinstance(operand, IROperand): - return IRConstExpr(operand) return operand def func_name(self, children) -> str: diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 670a998a9c..ab2b7c88a7 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -29,6 +29,7 @@ IROperand, IRVariable, ) +from vyper.venom.const_eval import try_evaluate_const_expr from vyper.venom.context import IRContext, IRFunction from vyper.venom.stack_model import StackModel @@ -165,6 +166,18 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr self.visited_basicblocks = OrderedSet() self.label_counter = 0 + # Evaluate const expressions and populate constants + for name, expr in self.ctx.const_expressions.items(): + result = try_evaluate_const_expr( + expr, self.ctx.constants, self.ctx.global_labels, + self.ctx.unresolved_consts, self.ctx.const_refs + ) + if isinstance(result, int): + self.ctx.constants[name] = result + else: + # Store as unresolved constant + self.ctx.unresolved_consts[name] = expr + asm: list[AssemblyInstruction] = [] # Add global variables to the assembly From 539dc656a68812dacf3413d7c766a08ee8808885 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 4 Jul 2025 19:28:03 +0300 Subject: [PATCH 142/172] wip --- vyper/evm/assembler/optimizer.py | 12 ++++++++++++ vyper/evm/assembler/symbols.py | 23 +++++++++++++++-------- vyper/venom/venom_to_assembly.py | 10 ++++++++-- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/vyper/evm/assembler/optimizer.py b/vyper/evm/assembler/optimizer.py index 4752552cea..e563ceca07 100644 --- a/vyper/evm/assembler/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -2,11 +2,13 @@ from vyper.evm.assembler.core import ( DATA_ITEM, JUMPDEST, + PUSH_OFST, PUSHLABEL, PUSHLABELJUMPDEST, Label, is_symbol, ) +from vyper.evm.assembler.symbols import CONSTREF, BaseConstOp from vyper.exceptions import CompilerPanic _TERMINAL_OPS = ("JUMP", "RETURN", "REVERT", "STOP", "INVALID") @@ -204,6 +206,16 @@ def _prune_unused_jumpdests(assembly): # add symbols used in data sections as they are likely # used for a jumptable. used_as_jumpdests.add(item.data) + + # Track labels referenced through CONSTREF + if isinstance(item, PUSH_OFST) and isinstance(item.label, CONSTREF): + used_as_labels.add(Label(item.label.label)) + + # Track labels in BaseConstOp operations (CONST_ADD, CONST_SUB, etc.) + if isinstance(item, BaseConstOp): + for operand in [item.op1, item.op2]: + if isinstance(operand, str): + used_as_labels.add(Label(operand)) # delete jumpdests that aren't used i = 0 diff --git a/vyper/evm/assembler/symbols.py b/vyper/evm/assembler/symbols.py index fb79ef0714..fc17a3b1ea 100644 --- a/vyper/evm/assembler/symbols.py +++ b/vyper/evm/assembler/symbols.py @@ -67,14 +67,21 @@ def __eq__(self, other): def _resolve_operand(self, operand: str | int, symbol_map: dict[SymbolKey, int]) -> int | None: if isinstance(operand, str): - # Try as CONSTREF first - op_ref = CONSTREF(operand) - if op_ref in symbol_map: - return symbol_map[op_ref] - # Try as Label - label = Label(operand) - if label in symbol_map: - return symbol_map[label] + # Handle @ prefix for label references + if operand.startswith("@"): + label_name = operand[1:] + label = Label(label_name) + if label in symbol_map: + return symbol_map[label] + else: + # Try as CONSTREF first + op_ref = CONSTREF(operand) + if op_ref in symbol_map: + return symbol_map[op_ref] + # Try as Label + label = Label(operand) + if label in symbol_map: + return symbol_map[label] elif isinstance(operand, int): return operand return None diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index ab2b7c88a7..2c30cc286d 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -291,8 +291,14 @@ def _emit_input_operands( if inst.opcode != "invoke": # Check if this label is an unresolved constant if op.value in self.ctx.unresolved_consts: - # Use PUSH_OFST with CONSTREF for unresolved constants - assembly.append(PUSH_OFST(CONSTREF(op.value), 0)) + expr = self.ctx.unresolved_consts[op.value] + # Check if it's a simple reference (not a real constant) + if isinstance(expr, tuple) and len(expr) == 2 and expr[0] == "ref": + # Simple label reference - use PUSHLABEL + assembly.append(PUSHLABEL(_as_asm_symbol(op))) + else: + # Real unresolved constant - use PUSH_OFST with CONSTREF + assembly.append(PUSH_OFST(CONSTREF(op.value), 0)) else: assembly.append(PUSHLABEL(_as_asm_symbol(op))) stack.push(op) From d2a91dc3fd9999a6869fa0c737827c3c8acc37a8 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 7 Jul 2025 14:49:54 +0300 Subject: [PATCH 143/172] wip, lint, test passing --- tests/unit/compiler/asm/test_assembler.py | 10 +---- tests/unit/venom/test_const_expressions.py | 51 ++++++++++++++++++---- tests/venom_utils.py | 7 +++ vyper/evm/assembler/core.py | 3 +- vyper/evm/assembler/optimizer.py | 4 +- vyper/venom/__init__.py | 17 ++++++-- vyper/venom/basicblock.py | 1 - vyper/venom/const_eval.py | 37 +++++++++++----- vyper/venom/parser.py | 12 +++-- vyper/venom/venom_to_assembly.py | 44 ++++++++++++++----- 10 files changed, 136 insertions(+), 50 deletions(-) diff --git a/tests/unit/compiler/asm/test_assembler.py b/tests/unit/compiler/asm/test_assembler.py index cc0c3eb95d..6d48ccdf87 100644 --- a/tests/unit/compiler/asm/test_assembler.py +++ b/tests/unit/compiler/asm/test_assembler.py @@ -1,12 +1,6 @@ import pytest -from vyper.evm.assembler.core import ( - PUSH_OFST, - _assembly_to_evm, - _resolve_constants, - assembly_to_evm, - resolve_symbols, -) +from vyper.evm.assembler.core import PUSH_OFST, _resolve_constants, assembly_to_evm, resolve_symbols from vyper.evm.assembler.symbols import CONST, CONST_ADD, CONST_MAX, CONST_SUB, CONSTREF, Label from vyper.exceptions import CompilerPanic @@ -64,7 +58,6 @@ def test_const_add_with_label(): assert len(bytecode) > 0 - def test_const_add_with_label_overflow(): # Create assembly with a large offset that will overflow when added to a label asm = [ @@ -80,4 +73,3 @@ def test_const_add_with_label_overflow(): resolve_symbols(asm) assert "will_overflow" in str(exc_info.value) - diff --git a/tests/unit/venom/test_const_expressions.py b/tests/unit/venom/test_const_expressions.py index d1af150554..520b4eebbe 100644 --- a/tests/unit/venom/test_const_expressions.py +++ b/tests/unit/venom/test_const_expressions.py @@ -2,6 +2,7 @@ from vyper.evm.assembler.core import assembly_to_evm from vyper.evm.assembler.symbols import CONST, CONST_ADD, Label +from vyper.venom import _resolve_const_operands from vyper.venom.basicblock import IRLabel, IRLiteral from vyper.venom.const_eval import ConstEvalException, evaluate_const_expr, try_evaluate_const_expr from vyper.venom.parser import parse_venom @@ -83,6 +84,11 @@ def test_venom_const_definitions(): ctx = parse_venom(code) + # Evaluate const expressions (this would normally happen during compilation) + _resolve_const_operands(ctx) + compiler = VenomCompiler(ctx) + compiler.generate_evm_assembly(no_optimize=True) + # Check constants assert ctx.constants["SLOT_SIZE"] == 32 assert ctx.constants["BASE_ADDR"] == 0x1000 @@ -112,6 +118,14 @@ def test_venom_label_addresses(): ctx = parse_venom(code) + # Evaluate const expressions (this would normally happen during compilation) + from vyper.venom import _resolve_const_operands + from vyper.venom.venom_to_assembly import VenomCompiler + + _resolve_const_operands(ctx) + compiler = VenomCompiler(ctx) + compiler.generate_evm_assembly(no_optimize=True) + # Check global labels assert ctx.global_labels["data_label"] == 0x1000 assert ctx.global_labels["computed_label"] == 0x1020 @@ -149,6 +163,12 @@ def test_venom_instruction_operands(): """ ctx = parse_venom(code) + + # Evaluate const expressions (this would normally happen during compilation) + _resolve_const_operands(ctx) + compiler = VenomCompiler(ctx) + compiler.generate_evm_assembly(no_optimize=True) + fn = ctx.entry_function bb = fn.get_basic_block("entry") @@ -180,22 +200,22 @@ def test_venom_complex_example(): array_end: add(@array_data, mul($WORD_SIZE, 10)) function process_array { - loop_start: + entry: %ptr = mload 0 %val = mload %ptr %next_ptr = add %ptr, $WORD_SIZE mstore 0, %next_ptr - %done = ge %next_ptr, @array_end - %should_continue = iszero %done - jnz %should_continue, @loop_start, @finish - - finish: return %ptr, %val } """ ctx = parse_venom(code) + # Evaluate const expressions (this would normally happen during compilation) + _resolve_const_operands(ctx) + compiler = VenomCompiler(ctx) + compiler.generate_evm_assembly(no_optimize=True) + # Check computed constants assert ctx.constants["ARRAY_OFFSET"] == 160 assert ctx.constants["DATA_START"] == 224 @@ -206,7 +226,7 @@ def test_venom_complex_example(): # Check instruction operands fn = ctx.get_function(ctx.functions[list(ctx.functions.keys())[0]].name) - bb = fn.get_basic_block("loop_start") + bb = fn.get_basic_block("entry") # Find the add instruction add_inst = None @@ -291,6 +311,9 @@ def test_venom_with_undefined_constants(): ctx = parse_venom(code) + # Evaluate const expressions (this would normally happen during compilation) + _resolve_const_operands(ctx) + # Check that defined constant is resolved assert ctx.constants["A"] == 100 @@ -335,6 +358,9 @@ def test_venom_undefined_in_instruction_operands(): ctx = parse_venom(code) + # Evaluate const expressions (this would normally happen during compilation) + _resolve_const_operands(ctx) + # Check that undefined constants are tracked assert len(ctx.const_refs) > 0 assert "UNDEFINED_OFFSET" in ctx.const_refs or len(ctx.unresolved_consts) > 0 @@ -372,6 +398,9 @@ def test_complex_undefined_chain(): ctx = parse_venom(code) + # Evaluate const expressions (this would normally happen during compilation) + _resolve_const_operands(ctx) + # Should track multiple undefined constants assert len(ctx.const_refs) >= 2 or len(ctx.unresolved_consts) >= 2 @@ -400,6 +429,9 @@ def test_undefined_const_end_to_end(): ctx = parse_venom(code) + # Evaluate const expressions (this would normally happen during compilation) + _resolve_const_operands(ctx) + assert len(ctx.const_refs) >= 1 assert "UNDEFINED_X" in ctx.const_refs @@ -529,6 +561,7 @@ def test_label_dependent_const_example(): bytecode, _ = assembly_to_evm(asm) assert len(bytecode) > 0 + def test_undefined_const_label_expression_linking_example_2(): code = """ const SLOT_SIZE = 32 @@ -543,10 +576,10 @@ def test_undefined_const_label_expression_linking_example_2(): """ ctx = parse_venom(code) - + compiler = VenomCompiler(ctx) asm = compiler.generate_evm_assembly(no_optimize=True) # Compile to bytecode bytecode, _ = assembly_to_evm(asm) - assert len(bytecode) > 0 \ No newline at end of file + assert len(bytecode) > 0 diff --git a/tests/venom_utils.py b/tests/venom_utils.py index 7ebef8d8af..66c37babf1 100644 --- a/tests/venom_utils.py +++ b/tests/venom_utils.py @@ -1,3 +1,4 @@ +from vyper.venom import _resolve_const_operands from vyper.venom.analysis import IRAnalysesCache from vyper.venom.basicblock import IRBasicBlock, IRInstruction from vyper.venom.context import IRContext @@ -74,6 +75,9 @@ def __call__(self, pre: str, post: str, hevm: bool | None = None) -> list[IRPass hevm = self.default_hevm pre_ctx = parse_from_basic_block(pre) + # Resolve const expressions before running passes + _resolve_const_operands(pre_ctx) + for fn in pre_ctx.functions.values(): ac = IRAnalysesCache(fn) for p in self.passes: @@ -82,6 +86,9 @@ def __call__(self, pre: str, post: str, hevm: bool | None = None) -> list[IRPass obj.run_pass() post_ctx = parse_from_basic_block(post) + # Resolve const expressions before running passes + _resolve_const_operands(post_ctx) + for fn in post_ctx.functions.values(): ac = IRAnalysesCache(fn) for p in self.post_passes: diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 6a0cb4a450..149944b231 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -291,7 +291,8 @@ def _resolve_label_dependent_constants( # Check overflow for label-dependent constants if item.name in label_dependent_consts and value > max_push2_value: raise CompilerPanic( - f"Label-dependent constant '{item.name}' has value {value} (constants involving labels must fit in PUSH2 instructions) " + f"Label-dependent constant '{item.name}' has value {value} " + "(constants involving labels must fit in PUSH2 instructions)" ) _add_to_symbol_map(symbol_map, const_ref, value) changed = True diff --git a/vyper/evm/assembler/optimizer.py b/vyper/evm/assembler/optimizer.py index e563ceca07..a68714d755 100644 --- a/vyper/evm/assembler/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -206,11 +206,11 @@ def _prune_unused_jumpdests(assembly): # add symbols used in data sections as they are likely # used for a jumptable. used_as_jumpdests.add(item.data) - + # Track labels referenced through CONSTREF if isinstance(item, PUSH_OFST) and isinstance(item.label, CONSTREF): used_as_labels.add(Label(item.label.label)) - + # Track labels in BaseConstOp operations (CONST_ADD, CONST_SUB, etc.) if isinstance(item, BaseConstOp): for operand in [item.op1, item.op2]: diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 0a4cb2b7b8..9299237740 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -115,6 +115,14 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel, ac: IRAnalysesCache def _resolve_const_operands(ctx: IRContext) -> None: """Resolve raw const expressions in operands to IRLiteral or IRLabel.""" + # First evaluate simple const expressions to populate ctx.constants + for name, expr in ctx.const_expressions.items(): + if isinstance(expr, (int, IRLiteral)): + # Simple literal + value = expr if isinstance(expr, int) else expr.value + ctx.constants[name] = value + + # Now resolve operands for fn in ctx.functions.values(): for bb in fn.get_basic_blocks(): for inst in bb.instructions: @@ -123,8 +131,11 @@ def _resolve_const_operands(ctx: IRContext) -> None: if isinstance(op, (str, tuple)) and not isinstance(op, IROperand): # This is a raw const expression - evaluate it result = try_evaluate_const_expr( - op, ctx.constants, ctx.global_labels, - ctx.unresolved_consts, ctx.const_refs + op, + ctx.constants, + ctx.global_labels, + ctx.unresolved_consts, + ctx.const_refs, ) if isinstance(result, int): new_operands.append(IRLiteral(result)) @@ -143,7 +154,7 @@ def _run_global_passes(ctx: IRContext, optimize: OptimizationLevel, ir_analyses: def run_passes_on(ctx: IRContext, optimize: OptimizationLevel) -> None: # First resolve any raw const expressions in operands _resolve_const_operands(ctx) - + ir_analyses = {} for fn in ctx.functions.values(): ir_analyses[fn] = IRAnalysesCache(fn) diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index f08c666f18..29c9b77a1f 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -209,7 +209,6 @@ def __repr__(self) -> str: return f'x"{self.value.hex()}"' - class IRLabel(IROperand): """ IRLabel represents a label in IR. A label is a string that starts with a %. diff --git a/vyper/venom/const_eval.py b/vyper/venom/const_eval.py index 7b4e318eea..30474a8514 100644 --- a/vyper/venom/const_eval.py +++ b/vyper/venom/const_eval.py @@ -10,7 +10,7 @@ from typing import Any, Union from vyper.exceptions import CompilerPanic -from vyper.venom.basicblock import IRLiteral +from vyper.venom.basicblock import IRLabel, IRLiteral class ConstEvalException(CompilerPanic): @@ -36,6 +36,13 @@ def evaluate_const_expr(expr: Any, constants: dict[str, int], global_labels: dic if isinstance(expr, IRLiteral): return expr.value + if isinstance(expr, IRLabel): + # Handle IRLabel objects + label_name = expr.value + if label_name not in global_labels: + raise ConstEvalException(f"Undefined global label: {label_name}") + return global_labels[label_name] + if isinstance(expr, str): # Check if it's a constant reference ($NAME) if expr.startswith("$"): @@ -94,9 +101,6 @@ def try_evaluate_const_expr( unresolved_consts: dict[str, Any], const_refs: set[str], ) -> Union[int, str]: - # Import here to avoid circular imports - from vyper.venom.basicblock import IRLiteral - # Handle simple cases first if isinstance(expr, int): return expr @@ -104,6 +108,19 @@ def try_evaluate_const_expr( if isinstance(expr, IRLiteral): return expr.value + if isinstance(expr, IRLabel): + # Handle IRLabel objects + label_name = expr.value + if label_name in global_labels: + # Label is already defined, return its value + return global_labels[label_name] + else: + # Label is unresolved + const_refs.add(label_name) + if label_name not in unresolved_consts: + unresolved_consts[label_name] = ("ref", label_name) + return label_name + if isinstance(expr, str): # Check if it's a constant reference ($NAME) if expr.startswith("$"): @@ -119,13 +136,11 @@ def try_evaluate_const_expr( # Check if it's a label reference (@NAME) if expr.startswith("@"): label_name = expr[1:] - if label_name not in global_labels: - # Treat undefined labels like undefined constants - const_refs.add(label_name) - if label_name not in unresolved_consts: - unresolved_consts[label_name] = ("ref", label_name) - return label_name - return global_labels[label_name] + # Always treat label references as unresolved so they remain as labels + const_refs.add(label_name) + if label_name not in unresolved_consts: + unresolved_consts[label_name] = ("ref", label_name) + return label_name # Otherwise it might be a plain identifier raise ConstEvalException(f"Invalid constant expression: {expr}") diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index b541d69a9a..caaf54f846 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -162,7 +162,13 @@ def start(self, children) -> IRContext: # Process global labels for global_label in global_labels: name, expr = global_label.children - ctx.add_global_label(name, 0) + # For simple literals, we can evaluate immediately + if isinstance(expr, IRLiteral): + ctx.add_global_label(name, expr.value) + else: + # For complex expressions, store for later evaluation + ctx.add_global_label(name, 0) + ctx.add_const_expression(f"_global_label_{name}", expr) # Process functions for fn_name, items in funcs: @@ -411,8 +417,8 @@ def const_atom(self, children): if isinstance(child, IRLiteral): return child elif isinstance(child, IRLabel): - # Return as a label reference to be evaluated later - return f"@{child.value}" + # Return the IRLabel directly - no @ prefix needed + return child else: # Must be a const_ref (string starting with $) return child diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 2c30cc286d..0e5dd91d50 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -17,6 +17,7 @@ from vyper.evm.assembler.symbols import CONST_ADD, CONST_MAX, CONST_SUB, CONSTREF from vyper.exceptions import CompilerPanic, StackTooDeep from vyper.utils import MemoryPositions, OrderedSet, wrap256 +from vyper.venom import _resolve_const_operands from vyper.venom.analysis import CFGAnalysis, DFGAnalysis, IRAnalysesCache, LivenessAnalysis from vyper.venom.basicblock import ( PSEUDO_INSTRUCTION, @@ -166,11 +167,17 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr self.visited_basicblocks = OrderedSet() self.label_counter = 0 + # Resolve any raw const expressions in operands first + _resolve_const_operands(self.ctx) + # Evaluate const expressions and populate constants for name, expr in self.ctx.const_expressions.items(): result = try_evaluate_const_expr( - expr, self.ctx.constants, self.ctx.global_labels, - self.ctx.unresolved_consts, self.ctx.const_refs + expr, + self.ctx.constants, + self.ctx.global_labels, + self.ctx.unresolved_consts, + self.ctx.const_refs, ) if isinstance(result, int): self.ctx.constants[name] = result @@ -178,6 +185,20 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr # Store as unresolved constant self.ctx.unresolved_consts[name] = expr + # Process global label expressions that were stored separately + for name, expr in list(self.ctx.const_expressions.items()): + if name.startswith("_global_label_"): + label_name = name[len("_global_label_") :] + result = try_evaluate_const_expr( + expr, + self.ctx.constants, + self.ctx.global_labels, + self.ctx.unresolved_consts, + self.ctx.const_refs, + ) + if isinstance(result, int): + self.ctx.global_labels[label_name] = result + asm: list[AssemblyInstruction] = [] # Add global variables to the assembly @@ -193,6 +214,11 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr elif isinstance(expr, tuple) and len(expr) == 3: # Binary operation op_name, arg1, arg2 = expr + # Convert IRLabel objects to strings for assembler + if isinstance(arg1, IRLabel): + arg1 = arg1.value + if isinstance(arg2, IRLabel): + arg2 = arg2.value # Emit the appropriate CONST_* operation if op_name == "add": asm.append(CONST_ADD(label_name, arg1, arg2)) # type: ignore[arg-type] @@ -205,7 +231,7 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr # Auto-detect labels used in const expressions and mark their blocks for emission for fn in self.ctx.functions.values(): for bb in fn.get_basic_blocks(): - for label_name, expr in self.ctx.unresolved_consts.items(): + for _label_name, expr in self.ctx.unresolved_consts.items(): if isinstance(expr, tuple) and len(expr) == 3: _, arg1, arg2 = expr if arg1 == bb.label.value or arg2 == bb.label.value: @@ -291,14 +317,10 @@ def _emit_input_operands( if inst.opcode != "invoke": # Check if this label is an unresolved constant if op.value in self.ctx.unresolved_consts: - expr = self.ctx.unresolved_consts[op.value] - # Check if it's a simple reference (not a real constant) - if isinstance(expr, tuple) and len(expr) == 2 and expr[0] == "ref": - # Simple label reference - use PUSHLABEL - assembly.append(PUSHLABEL(_as_asm_symbol(op))) - else: - # Real unresolved constant - use PUSH_OFST with CONSTREF - assembly.append(PUSH_OFST(CONSTREF(op.value), 0)) + # For all unresolved constants, use PUSH_OFST with CONSTREF + # This ensures consistent handling whether they're simple refs or + # expressions + assembly.append(PUSH_OFST(CONSTREF(op.value), 0)) else: assembly.append(PUSHLABEL(_as_asm_symbol(op))) stack.push(op) From 6b9e3ee2a3164516dc3de1c5002def77743d7f0c Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 8 Jul 2025 13:28:36 +0300 Subject: [PATCH 144/172] refactor --- tests/unit/venom/test_const_expressions.py | 21 +++++------- tests/venom_utils.py | 6 ++-- vyper/venom/__init__.py | 38 ++-------------------- vyper/venom/context.py | 15 ++++++++- vyper/venom/resolve_const.py | 37 +++++++++++++++++++++ vyper/venom/venom_to_assembly.py | 4 +-- 6 files changed, 67 insertions(+), 54 deletions(-) create mode 100644 vyper/venom/resolve_const.py diff --git a/tests/unit/venom/test_const_expressions.py b/tests/unit/venom/test_const_expressions.py index 520b4eebbe..d125d6008e 100644 --- a/tests/unit/venom/test_const_expressions.py +++ b/tests/unit/venom/test_const_expressions.py @@ -2,10 +2,10 @@ from vyper.evm.assembler.core import assembly_to_evm from vyper.evm.assembler.symbols import CONST, CONST_ADD, Label -from vyper.venom import _resolve_const_operands from vyper.venom.basicblock import IRLabel, IRLiteral from vyper.venom.const_eval import ConstEvalException, evaluate_const_expr, try_evaluate_const_expr from vyper.venom.parser import parse_venom +from vyper.venom.resolve_const import resolve_const_operands from vyper.venom.venom_to_assembly import VenomCompiler @@ -85,7 +85,7 @@ def test_venom_const_definitions(): ctx = parse_venom(code) # Evaluate const expressions (this would normally happen during compilation) - _resolve_const_operands(ctx) + resolve_const_operands(ctx) compiler = VenomCompiler(ctx) compiler.generate_evm_assembly(no_optimize=True) @@ -119,10 +119,7 @@ def test_venom_label_addresses(): ctx = parse_venom(code) # Evaluate const expressions (this would normally happen during compilation) - from vyper.venom import _resolve_const_operands - from vyper.venom.venom_to_assembly import VenomCompiler - - _resolve_const_operands(ctx) + resolve_const_operands(ctx) compiler = VenomCompiler(ctx) compiler.generate_evm_assembly(no_optimize=True) @@ -165,7 +162,7 @@ def test_venom_instruction_operands(): ctx = parse_venom(code) # Evaluate const expressions (this would normally happen during compilation) - _resolve_const_operands(ctx) + resolve_const_operands(ctx) compiler = VenomCompiler(ctx) compiler.generate_evm_assembly(no_optimize=True) @@ -212,7 +209,7 @@ def test_venom_complex_example(): ctx = parse_venom(code) # Evaluate const expressions (this would normally happen during compilation) - _resolve_const_operands(ctx) + resolve_const_operands(ctx) compiler = VenomCompiler(ctx) compiler.generate_evm_assembly(no_optimize=True) @@ -312,7 +309,7 @@ def test_venom_with_undefined_constants(): ctx = parse_venom(code) # Evaluate const expressions (this would normally happen during compilation) - _resolve_const_operands(ctx) + resolve_const_operands(ctx) # Check that defined constant is resolved assert ctx.constants["A"] == 100 @@ -359,7 +356,7 @@ def test_venom_undefined_in_instruction_operands(): ctx = parse_venom(code) # Evaluate const expressions (this would normally happen during compilation) - _resolve_const_operands(ctx) + resolve_const_operands(ctx) # Check that undefined constants are tracked assert len(ctx.const_refs) > 0 @@ -399,7 +396,7 @@ def test_complex_undefined_chain(): ctx = parse_venom(code) # Evaluate const expressions (this would normally happen during compilation) - _resolve_const_operands(ctx) + resolve_const_operands(ctx) # Should track multiple undefined constants assert len(ctx.const_refs) >= 2 or len(ctx.unresolved_consts) >= 2 @@ -430,7 +427,7 @@ def test_undefined_const_end_to_end(): ctx = parse_venom(code) # Evaluate const expressions (this would normally happen during compilation) - _resolve_const_operands(ctx) + resolve_const_operands(ctx) assert len(ctx.const_refs) >= 1 assert "UNDEFINED_X" in ctx.const_refs diff --git a/tests/venom_utils.py b/tests/venom_utils.py index 66c37babf1..38c896d279 100644 --- a/tests/venom_utils.py +++ b/tests/venom_utils.py @@ -1,10 +1,10 @@ -from vyper.venom import _resolve_const_operands from vyper.venom.analysis import IRAnalysesCache from vyper.venom.basicblock import IRBasicBlock, IRInstruction from vyper.venom.context import IRContext from vyper.venom.function import IRFunction from vyper.venom.parser import parse_venom from vyper.venom.passes.base_pass import IRPass +from vyper.venom.resolve_const import resolve_const_operands def parse_from_basic_block(source: str, funcname="_global"): @@ -76,7 +76,7 @@ def __call__(self, pre: str, post: str, hevm: bool | None = None) -> list[IRPass pre_ctx = parse_from_basic_block(pre) # Resolve const expressions before running passes - _resolve_const_operands(pre_ctx) + resolve_const_operands(pre_ctx) for fn in pre_ctx.functions.values(): ac = IRAnalysesCache(fn) @@ -87,7 +87,7 @@ def __call__(self, pre: str, post: str, hevm: bool | None = None) -> list[IRPass post_ctx = parse_from_basic_block(post) # Resolve const expressions before running passes - _resolve_const_operands(post_ctx) + resolve_const_operands(post_ctx) for fn in post_ctx.functions.values(): ac = IRAnalysesCache(fn) diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 9299237740..b7a20d9939 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -11,7 +11,6 @@ from vyper.venom.analysis import MemSSA from vyper.venom.analysis.analysis import IRAnalysesCache from vyper.venom.basicblock import IRBasicBlock, IRHexString, IRLabel, IRLiteral, IROperand -from vyper.venom.const_eval import try_evaluate_const_expr from vyper.venom.context import DataSection, IRContext from vyper.venom.function import IRFunction from vyper.venom.passes import ( @@ -37,6 +36,7 @@ SingleUseExpansion, ) from vyper.venom.passes.dead_store_elimination import DeadStoreElimination +from vyper.venom.resolve_const import resolve_const_operands from vyper.venom.venom_to_assembly import VenomCompiler DEFAULT_OPT_LEVEL = OptimizationLevel.default() @@ -113,47 +113,13 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel, ac: IRAnalysesCache CFGNormalization(ac, fn).run_pass() -def _resolve_const_operands(ctx: IRContext) -> None: - """Resolve raw const expressions in operands to IRLiteral or IRLabel.""" - # First evaluate simple const expressions to populate ctx.constants - for name, expr in ctx.const_expressions.items(): - if isinstance(expr, (int, IRLiteral)): - # Simple literal - value = expr if isinstance(expr, int) else expr.value - ctx.constants[name] = value - - # Now resolve operands - for fn in ctx.functions.values(): - for bb in fn.get_basic_blocks(): - for inst in bb.instructions: - new_operands = [] - for op in inst.operands: - if isinstance(op, (str, tuple)) and not isinstance(op, IROperand): - # This is a raw const expression - evaluate it - result = try_evaluate_const_expr( - op, - ctx.constants, - ctx.global_labels, - ctx.unresolved_consts, - ctx.const_refs, - ) - if isinstance(result, int): - new_operands.append(IRLiteral(result)) - else: - # Return as label for unresolved expressions - new_operands.append(IRLabel(result, True)) - else: - new_operands.append(op) - inst.operands = new_operands - - def _run_global_passes(ctx: IRContext, optimize: OptimizationLevel, ir_analyses: dict) -> None: FunctionInlinerPass(ir_analyses, ctx, optimize).run_pass() def run_passes_on(ctx: IRContext, optimize: OptimizationLevel) -> None: # First resolve any raw const expressions in operands - _resolve_const_operands(ctx) + resolve_const_operands(ctx) ir_analyses = {} for fn in ctx.functions.values(): diff --git a/vyper/venom/context.py b/vyper/venom/context.py index 857f308a13..05c4ed8298 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -127,8 +127,21 @@ def as_graph(self) -> str: def __repr__(self) -> str: s = [] + + # Print const expressions first + for name, expr in self.const_expressions.items(): + if isinstance(expr, tuple) and len(expr) == 3: + # Format operation expressions + op, arg1, arg2 = expr + s.append(f"const {name} = {op}({arg1}, {arg2})") + else: + s.append(f"const {name} = {expr}") + + if self.const_expressions: + s.append("") + for fn in self.functions.values(): s.append(IRFunction.__repr__(fn)) - s.append("\n") + s.append("") return "\n".join(s) diff --git a/vyper/venom/resolve_const.py b/vyper/venom/resolve_const.py new file mode 100644 index 0000000000..78f9dcc7bf --- /dev/null +++ b/vyper/venom/resolve_const.py @@ -0,0 +1,37 @@ +from vyper.venom.basicblock import IRLabel, IRLiteral, IROperand +from vyper.venom.const_eval import try_evaluate_const_expr +from vyper.venom.context import IRContext + + +def resolve_const_operands(ctx: IRContext) -> None: + """Resolve raw const expressions in operands to IRLiteral or IRLabel.""" + # First evaluate simple const expressions to populate ctx.constants + for name, expr in ctx.const_expressions.items(): + if isinstance(expr, (int, IRLiteral)): + # Simple literal + value = expr if isinstance(expr, int) else expr.value + ctx.constants[name] = value + + # Now resolve operands + for fn in ctx.functions.values(): + for bb in fn.get_basic_blocks(): + for inst in bb.instructions: + new_operands = [] + for op in inst.operands: + if isinstance(op, (str, tuple)) and not isinstance(op, IROperand): + # This is a raw const expression - evaluate it + result = try_evaluate_const_expr( + op, + ctx.constants, + ctx.global_labels, + ctx.unresolved_consts, + ctx.const_refs, + ) + if isinstance(result, int): + new_operands.append(IRLiteral(result)) + else: + # Return as label for unresolved expressions + new_operands.append(IRLabel(result, True)) + else: + new_operands.append(op) + inst.operands = new_operands diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 0e5dd91d50..62948e7509 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -17,7 +17,6 @@ from vyper.evm.assembler.symbols import CONST_ADD, CONST_MAX, CONST_SUB, CONSTREF from vyper.exceptions import CompilerPanic, StackTooDeep from vyper.utils import MemoryPositions, OrderedSet, wrap256 -from vyper.venom import _resolve_const_operands from vyper.venom.analysis import CFGAnalysis, DFGAnalysis, IRAnalysesCache, LivenessAnalysis from vyper.venom.basicblock import ( PSEUDO_INSTRUCTION, @@ -32,6 +31,7 @@ ) from vyper.venom.const_eval import try_evaluate_const_expr from vyper.venom.context import IRContext, IRFunction +from vyper.venom.resolve_const import resolve_const_operands from vyper.venom.stack_model import StackModel DEBUG_SHOW_COST = False @@ -168,7 +168,7 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr self.label_counter = 0 # Resolve any raw const expressions in operands first - _resolve_const_operands(self.ctx) + resolve_const_operands(self.ctx) # Evaluate const expressions and populate constants for name, expr in self.ctx.const_expressions.items(): From 8f266bbab8bc78fe3d3ca6f678f3fd94cf4cf371 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 9 Jul 2025 13:12:29 +0300 Subject: [PATCH 145/172] don't emit empty data_item --- vyper/venom/venom_to_assembly.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 62948e7509..28d675094b 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -584,7 +584,8 @@ def _generate_evm_for_instruction( if isinstance(data_operand, IRLabel): assembly.append(DATA_ITEM(_as_asm_symbol(data_operand))) elif isinstance(data_operand, IRHexString): - assembly.append(DATA_ITEM(data_operand.value)) + if len(data_operand.value) > 0: + assembly.append(DATA_ITEM(data_operand.value)) else: raise Exception(f"Unsupported db operand type: {type(data_operand)}") elif opcode == "jnz": From 6afe95ea213712ad667fa6add36023800ce5a411 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 9 Jul 2025 14:20:22 +0300 Subject: [PATCH 146/172] refactor --- vyper/evm/assembler/constants.py | 4 ++++ vyper/evm/assembler/core.py | 5 +---- vyper/venom/context.py | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/vyper/evm/assembler/constants.py b/vyper/evm/assembler/constants.py index daafcb97ab..e6c78c288f 100644 --- a/vyper/evm/assembler/constants.py +++ b/vyper/evm/assembler/constants.py @@ -1,2 +1,6 @@ # Commutative operations in EVM COMMUTATIVE_OPS = {"ADD", "MUL", "EQ", "AND", "OR", "XOR"} + +PUSH_OFFSET = 0x5F +DUP_OFFSET = 0x7F +SWAP_OFFSET = 0x8F diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 149944b231..60e6efbe08 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -1,14 +1,11 @@ from typing import Any +from vyper.evm.assembler.constants import DUP_OFFSET, PUSH_OFFSET, SWAP_OFFSET from vyper.evm.assembler.symbols import CONST, CONSTREF, BaseConstOp, Label, SymbolKey from vyper.evm.opcodes import get_opcodes, version_check from vyper.exceptions import CompilerPanic from vyper.utils import OrderedSet -PUSH_OFFSET = 0x5F -DUP_OFFSET = 0x7F -SWAP_OFFSET = 0x8F - def num_to_bytearray(x): o = [] diff --git a/vyper/venom/context.py b/vyper/venom/context.py index 05c4ed8298..889812664e 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -127,7 +127,7 @@ def as_graph(self) -> str: def __repr__(self) -> str: s = [] - + # Print const expressions first for name, expr in self.const_expressions.items(): if isinstance(expr, tuple) and len(expr) == 3: @@ -136,10 +136,10 @@ def __repr__(self) -> str: s.append(f"const {name} = {op}({arg1}, {arg2})") else: s.append(f"const {name} = {expr}") - + if self.const_expressions: s.append("") - + for fn in self.functions.values(): s.append(IRFunction.__repr__(fn)) s.append("") From 3686d9267672b12901ebf9d01724d350d2a004c7 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 10 Jul 2025 13:41:36 +0300 Subject: [PATCH 147/172] pushsymbol wip --- .../compiler/venom/test_venom_to_assembly.py | 2 +- vyper/evm/assembler/core.py | 88 +++++++++++++++++++ vyper/venom/venom_to_assembly.py | 4 + 3 files changed, 93 insertions(+), 1 deletion(-) diff --git a/tests/unit/compiler/venom/test_venom_to_assembly.py b/tests/unit/compiler/venom/test_venom_to_assembly.py index a62b7e3e00..edf4bda972 100644 --- a/tests/unit/compiler/venom/test_venom_to_assembly.py +++ b/tests/unit/compiler/venom/test_venom_to_assembly.py @@ -1,4 +1,4 @@ -from vyper.evm.assembler.core import PUSHLABEL, Label +from vyper.evm.assembler.core import PUSHLABEL, PUSHSYMBOL, Label from vyper.venom.parser import parse_venom from vyper.venom.venom_to_assembly import VenomCompiler diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 60e6efbe08..44919dc401 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -87,6 +87,27 @@ def __hash__(self): return hash((self.label, self.ofst)) +class PUSHSYMBOL: + """ + Universal symbol reference instruction that can handle any symbol type. + The assembler will resolve whether it's a label, constant, or const reference. + """ + def __init__(self, symbol_name: str): + assert isinstance(symbol_name, str) + self.symbol_name = symbol_name + + def __repr__(self): + return f"PUSHSYMBOL {self.symbol_name}" + + def __eq__(self, other): + if not isinstance(other, PUSHSYMBOL): + return False + return self.symbol_name == other.symbol_name + + def __hash__(self): + return hash(self.symbol_name) + + class DATA_ITEM: def __init__(self, item: bytes | Label): self.data = item @@ -378,6 +399,28 @@ def resolve_symbols( elif isinstance(item, (PUSHLABEL, PUSHLABELJUMPDEST)): pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits + elif isinstance(item, PUSHSYMBOL): + # Determine symbol type and appropriate size + symbol_name = item.symbol_name + + # Strip $ prefix if present for const references + if symbol_name.startswith("$"): + const_name = symbol_name[1:] + if CONSTREF(const_name) in symbol_map: + # Check if this is a label-dependent constant + if const_name in label_dependent_consts: + pc += SYMBOL_SIZE + 1 # PUSH2 for label-dependent constants + else: + # Calculate actual size for pure constants + val = symbol_map[CONSTREF(const_name)] + pc += calc_push_size(val) + else: + # Assume it will be a label-dependent constant + pc += SYMBOL_SIZE + 1 + else: + # It's either a label or will be resolved as one + pc += SYMBOL_SIZE + 1 # PUSH2 for labels + elif isinstance(item, PUSH_OFST): assert isinstance(item.ofst, int), item # [PUSH_OFST, (Label foo), bar] -> PUSH2 (foo+bar) @@ -614,6 +657,51 @@ def _assembly_to_evm( bytecode = _compile_push_instruction(PUSH_N(symbol_map[label], n=SYMBOL_SIZE)) ret.extend(bytecode) + elif isinstance(item, PUSHSYMBOL): + # Resolve the symbol and push appropriate value + symbol_name = item.symbol_name + + # Handle const references (starting with $) + if symbol_name.startswith("$"): + const_name = symbol_name[1:] + const_ref = CONSTREF(const_name) + if const_ref in symbol_map: + val = symbol_map[const_ref] + # Check if this is a label-dependent constant + if const_name in label_dependent_consts: + # Use PUSH2 for label-dependent constants + bytecode = _compile_push_instruction(PUSH_N(val, SYMBOL_SIZE)) + else: + # Use optimal size for pure constants + bytecode = _compile_push_instruction(PUSH(val)) + ret.extend(bytecode) + else: + raise CompilerPanic(f"Undefined constant reference: {symbol_name}") + else: + # Try as a label first + label = Label(symbol_name) + if label in symbol_map: + bytecode = _compile_push_instruction(PUSH_N(symbol_map[label], n=SYMBOL_SIZE)) + ret.extend(bytecode) + else: + # Try as a constant without $ prefix + const_ref = CONSTREF(symbol_name) + if const_ref in symbol_map: + val = symbol_map[const_ref] + # Check if this is a label-dependent constant + if symbol_name in label_dependent_consts: + # Use PUSH2 for label-dependent constants + bytecode = _compile_push_instruction(PUSH_N(val, SYMBOL_SIZE)) + else: + # Use optimal size for pure constants + bytecode = _compile_push_instruction(PUSH(val)) + ret.extend(bytecode) + else: + # Symbol not found. This will raise KeyError with the actual undefined symbol. + # This is the same behavior as PUSHLABEL. + bytecode = _compile_push_instruction(PUSH_N(symbol_map[label], n=SYMBOL_SIZE)) + ret.extend(bytecode) + elif isinstance(item, JUMPDEST): jumpdest_opcode = get_opcodes()["JUMPDEST"][0] assert jumpdest_opcode is not None # help mypy diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 28d675094b..617edab0fb 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -695,6 +695,10 @@ def _optimistic_swap(self, assembly, inst, next_liveness, stack): if DEBUG_SHOW_COST: stack0 = stack.copy() + # Handle empty liveness set + # if not next_liveness: + # return + next_scheduled = next_liveness.last() cost = 0 if not self.dfg.are_equivalent(inst.output, next_scheduled): From 847117bf222e49b8686dafdccf1f2a6b9bfcd697 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 17 Jul 2025 18:42:03 +0300 Subject: [PATCH 148/172] refactor unresolved constant handling --- vyper/venom/venom_to_assembly.py | 39 ++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 617edab0fb..28e5ab5035 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -182,8 +182,35 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr if isinstance(result, int): self.ctx.constants[name] = result else: - # Store as unresolved constant - self.ctx.unresolved_consts[name] = expr + # Check if try_evaluate_const_expr already added this to unresolved_consts + # under a different name (e.g., __const_0). If so, update it to use our name. + found_existing = False + + # Normalize the expression for comparison + def normalize_expr(e): + if isinstance(e, tuple) and len(e) == 3: + op, a1, a2 = e + # Strip @ prefix from label references + if isinstance(a1, str) and a1.startswith("@"): + a1 = a1[1:] + if isinstance(a2, str) and a2.startswith("@"): + a2 = a2[1:] + return (op, a1, a2) + return e + + normalized_expr = normalize_expr(expr) + + for existing_name, existing_expr in list(self.ctx.unresolved_consts.items()): + if existing_name.startswith("__const_") and normalize_expr(existing_expr) == normalized_expr: + # Remove the auto-generated name and use our explicit name + del self.ctx.unresolved_consts[existing_name] + self.ctx.unresolved_consts[name] = existing_expr + found_existing = True + break + + if not found_existing: + # Store as unresolved constant + self.ctx.unresolved_consts[name] = expr # Process global label expressions that were stored separately for name, expr in list(self.ctx.const_expressions.items()): @@ -315,9 +342,11 @@ def _emit_input_operands( # invoke emits the actual instruction itself so we don't need # to emit it here but we need to add it to the stack map if inst.opcode != "invoke": - # Check if this label is an unresolved constant - if op.value in self.ctx.unresolved_consts: - # For all unresolved constants, use PUSH_OFST with CONSTREF + # Check if this label is a constant reference + if (op.value in self.ctx.unresolved_consts or + op.value in self.ctx.constants or + op.value in self.ctx.const_expressions): + # For all constants, use PUSH_OFST with CONSTREF # This ensures consistent handling whether they're simple refs or # expressions assembly.append(PUSH_OFST(CONSTREF(op.value), 0)) From 3c3c3e884f2deaa64efe0f1e1d3247021c16da58 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 17 Jul 2025 19:08:37 +0300 Subject: [PATCH 149/172] improve constant resolution logic --- vyper/evm/assembler/core.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 44919dc401..bd632eff0e 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -435,9 +435,14 @@ def resolve_symbols( pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits else: # For non-label-dependent constants, calculate actual size - const = symbol_map[item.label] - val = const + item.ofst - pc += calc_push_size(val) + # Try to look up as a CONSTREF first + if item.label in symbol_map: + const = symbol_map[item.label] + val = const + item.ofst + pc += calc_push_size(val) + else: + # Treat it as a label-dependent reference using PUSH2 size + pc += SYMBOL_SIZE + 1 # PUSH2 else: # pragma: nocover raise CompilerPanic(f"invalid ofst {item.label}") @@ -715,10 +720,18 @@ def _assembly_to_evm( bytecode = _compile_push_instruction(PUSH_N(ofst, SYMBOL_SIZE)) else: assert isinstance(item.label, CONSTREF) - ofst = symbol_map[item.label] + item.ofst + const_name = item.label.label + + # Try to look up as a CONSTREF first + if item.label in symbol_map: + ofst = symbol_map[item.label] + item.ofst + # If not found as CONSTREF, try as a Label + elif Label(const_name) in symbol_map: + ofst = symbol_map[Label(const_name)] + item.ofst + else: + raise CompilerPanic(f"Unknown symbol: {const_name}") # Check if this is a label-dependent constant - const_name = item.label.label if const_name in label_dependent_consts: # Use PUSH2 for label-dependent constants # Also validate the value fits in 16 bits From e42b320d241108c173fc0df06be026332b6a71db Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 17 Jul 2025 20:21:31 +0300 Subject: [PATCH 150/172] prefix refactor --- tests/unit/venom/test_const_expressions.py | 28 ++++---- vyper/venom/basicblock.py | 44 ++++++++++++- vyper/venom/const_eval.py | 74 ++++++++++------------ vyper/venom/memory_location.py | 4 +- vyper/venom/parser.py | 31 +++++---- vyper/venom/resolve_const.py | 4 +- vyper/venom/venom_to_assembly.py | 23 ++++--- 7 files changed, 125 insertions(+), 83 deletions(-) diff --git a/tests/unit/venom/test_const_expressions.py b/tests/unit/venom/test_const_expressions.py index d125d6008e..e9b38ccf30 100644 --- a/tests/unit/venom/test_const_expressions.py +++ b/tests/unit/venom/test_const_expressions.py @@ -2,7 +2,7 @@ from vyper.evm.assembler.core import assembly_to_evm from vyper.evm.assembler.symbols import CONST, CONST_ADD, Label -from vyper.venom.basicblock import IRLabel, IRLiteral +from vyper.venom.basicblock import ConstRef, IRLabel, IRLiteral, LabelRef from vyper.venom.const_eval import ConstEvalException, evaluate_const_expr, try_evaluate_const_expr from vyper.venom.parser import parse_venom from vyper.venom.resolve_const import resolve_const_operands @@ -17,12 +17,12 @@ def test_basic_const_eval(): assert evaluate_const_expr(42, constants, global_labels) == 42 # Test constant references - assert evaluate_const_expr("$A", constants, global_labels) == 10 - assert evaluate_const_expr("$B", constants, global_labels) == 20 + assert evaluate_const_expr(ConstRef("A"), constants, global_labels) == 10 + assert evaluate_const_expr(ConstRef("B"), constants, global_labels) == 20 # Test label references - assert evaluate_const_expr("@label1", constants, global_labels) == 0x100 - assert evaluate_const_expr("@label2", constants, global_labels) == 0x200 + assert evaluate_const_expr(LabelRef("label1"), constants, global_labels) == 0x100 + assert evaluate_const_expr(LabelRef("label2"), constants, global_labels) == 0x200 # Test operations assert evaluate_const_expr(("add", 10, 20), constants, global_labels) == 30 @@ -34,12 +34,12 @@ def test_basic_const_eval(): assert evaluate_const_expr(("min", 10, 20), constants, global_labels) == 10 # Test operations with references - assert evaluate_const_expr(("add", "$A", "$B"), constants, global_labels) == 30 - assert evaluate_const_expr(("add", "@label1", 0x100), constants, global_labels) == 0x200 + assert evaluate_const_expr(("add", ConstRef("A"), ConstRef("B")), constants, global_labels) == 30 + assert evaluate_const_expr(("add", LabelRef("label1"), 0x100), constants, global_labels) == 0x200 # Test nested operations assert evaluate_const_expr(("add", ("mul", 2, 3), 4), constants, global_labels) == 10 - assert evaluate_const_expr(("mul", ("add", "$A", 5), 2), constants, global_labels) == 30 + assert evaluate_const_expr(("mul", ("add", ConstRef("A"), 5), 2), constants, global_labels) == 30 def test_const_eval_errors(): @@ -48,11 +48,11 @@ def test_const_eval_errors(): # Test undefined constant with pytest.raises(ConstEvalException, match="Undefined constant: B"): - evaluate_const_expr("$B", constants, global_labels) + evaluate_const_expr(ConstRef("B"), constants, global_labels) # Test undefined label with pytest.raises(ConstEvalException, match="Undefined global label: label2"): - evaluate_const_expr("@label2", constants, global_labels) + evaluate_const_expr(LabelRef("label2"), constants, global_labels) # Test division by zero with pytest.raises(ConstEvalException, match="Division by zero"): @@ -244,13 +244,13 @@ def test_try_evaluate_undefined_const(): const_refs = set() # Test defined constant - returns value - result = try_evaluate_const_expr("$A", constants, global_labels, unresolved_consts, const_refs) + result = try_evaluate_const_expr(ConstRef("A"), constants, global_labels, unresolved_consts, const_refs) assert result == 10 assert len(unresolved_consts) == 0 assert len(const_refs) == 0 # Test undefined constant - returns label - result = try_evaluate_const_expr("$B", constants, global_labels, unresolved_consts, const_refs) + result = try_evaluate_const_expr(ConstRef("B"), constants, global_labels, unresolved_consts, const_refs) assert isinstance(result, str) assert result == "B" # Now uses the constant name directly assert "B" in const_refs @@ -267,7 +267,7 @@ def test_try_evaluate_undefined_in_operation(): # Operation with one undefined constant result = try_evaluate_const_expr( - ("add", "$A", "$B"), constants, global_labels, unresolved_consts, const_refs + ("add", ConstRef("A"), ConstRef("B")), constants, global_labels, unresolved_consts, const_refs ) assert isinstance(result, str) assert result.startswith("__const_") # Complex expressions still get generated names @@ -284,7 +284,7 @@ def test_try_evaluate_undefined_in_operation(): unresolved_consts.clear() const_refs.clear() result = try_evaluate_const_expr( - ("mul", "$B", "$C"), constants, global_labels, unresolved_consts, const_refs + ("mul", ConstRef("B"), ConstRef("C")), constants, global_labels, unresolved_consts, const_refs ) assert isinstance(result, str) assert result.startswith("__const_") diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 29c9b77a1f..288d197938 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -3,6 +3,7 @@ import json import re from contextvars import ContextVar +from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Iterator, Optional, Union import vyper.venom.effects as effects @@ -220,6 +221,7 @@ class IRLabel(IROperand): value: str address: Optional[int] = None # optional address override + def __init__(self, value: str, is_symbol: bool = False, address: Optional[int] = None) -> None: assert isinstance(value, str), f"not a str: {value} ({type(value)})" assert len(value) > 0 @@ -236,6 +238,30 @@ def __repr__(self): return json.dumps(self.value) # escape it +@dataclass(frozen=True) +class ConstRef: + """ + Reference to a named constant in Venom IR. + Replaces the $-prefixed string representation. + """ + name: str + + def __str__(self): + return f"${self.name}" + + +@dataclass(frozen=True) +class LabelRef: + """ + Reference to a label in Venom IR. + Replaces the @-prefixed string representation. + """ + name: str + + def __str__(self): + return f"@{self.name}" + + class IRInstruction: """ IRInstruction represents an instruction in IR. Each instruction has an opcode, @@ -432,7 +458,14 @@ def str_short(self) -> str: operands = self.operands if opcode not in ["jmp", "jnz", "djmp", "invoke"]: operands = list(reversed(operands)) - s += ", ".join([(f"@{op}" if isinstance(op, IRLabel) else str(op)) for op in operands]) + def format_operand(op): + if isinstance(op, IRLabel): + return f"@{op}" + elif isinstance(op, (ConstRef, LabelRef)): + return str(op) # Uses their __str__ methods which add prefixes + else: + return str(op) + s += ", ".join([format_operand(op) for op in operands]) return s def __repr__(self) -> str: @@ -446,7 +479,14 @@ def __repr__(self) -> str: operands = [operands[0]] + list(reversed(operands[1:])) elif self.opcode not in ("jmp", "jnz", "djmp", "phi"): operands = reversed(operands) # type: ignore - s += ", ".join([(f"@{op}" if isinstance(op, IRLabel) else str(op)) for op in operands]) + def format_operand(op): + if isinstance(op, IRLabel): + return f"@{op}" + elif isinstance(op, (ConstRef, LabelRef)): + return str(op) # Uses their __str__ methods which add prefixes + else: + return str(op) + s += ", ".join([format_operand(op) for op in operands]) if self.annotation: s = f"{s: <30} ; {self.annotation}" diff --git a/vyper/venom/const_eval.py b/vyper/venom/const_eval.py index 30474a8514..594b394fb4 100644 --- a/vyper/venom/const_eval.py +++ b/vyper/venom/const_eval.py @@ -1,16 +1,12 @@ """ Constant expression evaluator for Venom IR. -Supports simple expressions with function-style notation: -- Literals: 123, 0x100 -- Constant references: $CONST_NAME -- Label references: @label_name - Operations: add(a, b), sub(a, b), mul(a, b), div(a, b), mod(a, b), max(a, b), min(a, b) """ from typing import Any, Union from vyper.exceptions import CompilerPanic -from vyper.venom.basicblock import IRLabel, IRLiteral +from vyper.venom.basicblock import ConstRef, IRLabel, IRLiteral, LabelRef class ConstEvalException(CompilerPanic): @@ -43,22 +39,18 @@ def evaluate_const_expr(expr: Any, constants: dict[str, int], global_labels: dic raise ConstEvalException(f"Undefined global label: {label_name}") return global_labels[label_name] - if isinstance(expr, str): - # Check if it's a constant reference ($NAME) - if expr.startswith("$"): - const_name = expr[1:] - if const_name not in constants: - raise ConstEvalException(f"Undefined constant: {const_name}") - return constants[const_name] - - # Check if it's a label reference (@NAME) - if expr.startswith("@"): - label_name = expr[1:] - if label_name not in global_labels: - raise ConstEvalException(f"Undefined global label: {label_name}") - return global_labels[label_name] + if isinstance(expr, ConstRef): + if expr.name not in constants: + raise ConstEvalException(f"Undefined constant: {expr.name}") + return constants[expr.name] - # Otherwise it might be a plain identifier (shouldn't happen in well-formed expressions) + if isinstance(expr, LabelRef): + if expr.name not in global_labels: + raise ConstEvalException(f"Undefined global label: {expr.name}") + return global_labels[expr.name] + + if isinstance(expr, str): + # String should not appear in well-formed expressions raise ConstEvalException(f"Invalid constant expression: {expr}") # Handle function-style operations @@ -121,28 +113,28 @@ def try_evaluate_const_expr( unresolved_consts[label_name] = ("ref", label_name) return label_name - if isinstance(expr, str): - # Check if it's a constant reference ($NAME) - if expr.startswith("$"): - const_name = expr[1:] - if const_name not in constants: - # Use the constant name directly as the label for simple references - const_refs.add(const_name) - if const_name not in unresolved_consts: - unresolved_consts[const_name] = ("ref", const_name) - return const_name - return constants[const_name] - - # Check if it's a label reference (@NAME) - if expr.startswith("@"): - label_name = expr[1:] - # Always treat label references as unresolved so they remain as labels - const_refs.add(label_name) - if label_name not in unresolved_consts: - unresolved_consts[label_name] = ("ref", label_name) - return label_name + if isinstance(expr, ConstRef): + if expr.name not in constants: + # Use the constant name directly as the label for simple references + const_refs.add(expr.name) + if expr.name not in unresolved_consts: + unresolved_consts[expr.name] = ("ref", expr.name) + return expr.name + return constants[expr.name] + + if isinstance(expr, LabelRef): + if expr.name in global_labels: + # Label is already defined, return its value + return global_labels[expr.name] + else: + # Label is unresolved + const_refs.add(expr.name) + if expr.name not in unresolved_consts: + unresolved_consts[expr.name] = ("ref", expr.name) + return expr.name - # Otherwise it might be a plain identifier + if isinstance(expr, str): + # String should not appear in well-formed expressions raise ConstEvalException(f"Invalid constant expression: {expr}") # Handle operations diff --git a/vyper/venom/memory_location.py b/vyper/venom/memory_location.py index 8d36dbe969..aa39346056 100644 --- a/vyper/venom/memory_location.py +++ b/vyper/venom/memory_location.py @@ -5,7 +5,7 @@ from vyper.evm.address_space import MEMORY, STORAGE, TRANSIENT, AddrSpace from vyper.exceptions import CompilerPanic -from vyper.venom.basicblock import IRLabel, IRLiteral, IROperand, IRVariable +from vyper.venom.basicblock import ConstRef, IRLabel, IRLiteral, IROperand, IRVariable @dataclass(frozen=True) @@ -53,6 +53,8 @@ def from_operands( _size = None elif isinstance(size, IRLabel): _size = None + elif isinstance(size, ConstRef): + _size = None # Constant reference - unknown at analysis time elif isinstance(size, int): _size = size else: # pragma: nocover diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index caaf54f846..f2796b3b07 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -4,6 +4,7 @@ from lark import Lark, Transformer from vyper.venom.basicblock import ( + ConstRef, IRBasicBlock, IRHexString, IRInstruction, @@ -11,6 +12,7 @@ IRLiteral, IROperand, IRVariable, + LabelRef, ) from vyper.venom.const_eval import evaluate_const_expr, try_evaluate_const_expr from vyper.venom.context import IRContext @@ -304,6 +306,14 @@ def assignment(self, children) -> IRInstruction: return value if isinstance(value, (IRLiteral, IRVariable, IRLabel)): return IRInstruction("store", [value], output=to) + # Handle typed const/label references + if isinstance(value, (ConstRef, LabelRef)): + # Convert to IRLabel for store instruction + if isinstance(value, LabelRef): + return IRInstruction("store", [IRLabel(value.name)], output=to) + else: + # ConstRef - store as is for evaluation later + return IRInstruction("store", [value], output=to) # type: ignore[list-item] # Handle const expressions that need evaluation if isinstance(value, (str, tuple)): # This will be evaluated later in the function processing @@ -377,12 +387,11 @@ def label_name(self, children) -> str: # label_name can be IDENT or ESCAPED_STRING return _unescape(str(children[0])) - def label_ref(self, children) -> IRLabel: + def label_ref(self, children) -> LabelRef: # label_ref is "@" followed by IDENT or ESCAPED_STRING + # The @ prefix is handled by the grammar, so we only get the label name label = _unescape(str(children[0])) - if label.startswith("@"): - label = label[1:] - return IRLabel(label, True) + return LabelRef(label) def VAR_IDENT(self, var_ident) -> IRVariable: return IRVariable(var_ident[1:]) @@ -414,18 +423,12 @@ def const_expr(self, children): def const_atom(self, children): # const_atom: CONST | const_ref | label_ref child = children[0] - if isinstance(child, IRLiteral): - return child - elif isinstance(child, IRLabel): - # Return the IRLabel directly - no @ prefix needed - return child - else: - # Must be a const_ref (string starting with $) - return child + # All three types (IRLiteral, ConstRef, LabelRef) can be returned directly + return child - def const_ref(self, children) -> str: + def const_ref(self, children) -> ConstRef: # const_ref: "$" IDENT - return f"${children[0]}" + return ConstRef(str(children[0])) def const_func(self, children): # const_func: IDENT "(" const_expr ("," const_expr)* ")" diff --git a/vyper/venom/resolve_const.py b/vyper/venom/resolve_const.py index 78f9dcc7bf..394c8d2726 100644 --- a/vyper/venom/resolve_const.py +++ b/vyper/venom/resolve_const.py @@ -1,4 +1,4 @@ -from vyper.venom.basicblock import IRLabel, IRLiteral, IROperand +from vyper.venom.basicblock import ConstRef, IRLabel, IRLiteral, IROperand, LabelRef from vyper.venom.const_eval import try_evaluate_const_expr from vyper.venom.context import IRContext @@ -18,7 +18,7 @@ def resolve_const_operands(ctx: IRContext) -> None: for inst in bb.instructions: new_operands = [] for op in inst.operands: - if isinstance(op, (str, tuple)) and not isinstance(op, IROperand): + if isinstance(op, (tuple, ConstRef, LabelRef)) and not isinstance(op, IROperand): # This is a raw const expression - evaluate it result = try_evaluate_const_expr( op, diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 28e5ab5035..b9e5887eb1 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -21,6 +21,7 @@ from vyper.venom.basicblock import ( PSEUDO_INSTRUCTION, TEST_INSTRUCTIONS, + ConstRef, IRBasicBlock, IRHexString, IRInstruction, @@ -28,6 +29,7 @@ IRLiteral, IROperand, IRVariable, + LabelRef, ) from vyper.venom.const_eval import try_evaluate_const_expr from vyper.venom.context import IRContext, IRFunction @@ -188,14 +190,7 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr # Normalize the expression for comparison def normalize_expr(e): - if isinstance(e, tuple) and len(e) == 3: - op, a1, a2 = e - # Strip @ prefix from label references - if isinstance(a1, str) and a1.startswith("@"): - a1 = a1[1:] - if isinstance(a2, str) and a2.startswith("@"): - a2 = a2[1:] - return (op, a1, a2) + # With typed objects, expressions are already normalized return e normalized_expr = normalize_expr(expr) @@ -241,11 +236,21 @@ def normalize_expr(e): elif isinstance(expr, tuple) and len(expr) == 3: # Binary operation op_name, arg1, arg2 = expr - # Convert IRLabel objects to strings for assembler + # Convert typed objects to strings for assembler if isinstance(arg1, IRLabel): arg1 = arg1.value + elif isinstance(arg1, ConstRef): + arg1 = arg1.name # No prefix for assembler + elif isinstance(arg1, LabelRef): + arg1 = f"@{arg1.name}" # Add @ prefix for assembler + if isinstance(arg2, IRLabel): arg2 = arg2.value + elif isinstance(arg2, ConstRef): + arg2 = arg2.name # No prefix for assembler + elif isinstance(arg2, LabelRef): + arg2 = f"@{arg2.name}" # Add @ prefix for assembler + # Emit the appropriate CONST_* operation if op_name == "add": asm.append(CONST_ADD(label_name, arg1, arg2)) # type: ignore[arg-type] From 571a1820f9a3ffe6cc4c718a4244e1d9e85fd448 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 17 Jul 2025 20:30:08 +0300 Subject: [PATCH 151/172] add handling for LabelRef in VenomTransformer --- vyper/venom/parser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index f2796b3b07..129c834f00 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -352,6 +352,8 @@ def instruction(self, children) -> IRInstruction: # We need access to context, so we'll store it as-is for now # and process it later during function processing processed_operands.append(op) + elif isinstance(op, LabelRef): + processed_operands.append(IRLabel(op.name)) else: processed_operands.append(op) From 7278d2f93d32f5aa2cd3be38b1c572fe0c30adc5 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 17 Jul 2025 20:50:00 +0300 Subject: [PATCH 152/172] refactor/cleanup --- vyper/evm/assembler/symbols.py | 23 ++++++++--------------- vyper/venom/basicblock.py | 1 - vyper/venom/venom_to_assembly.py | 22 +++++++++------------- 3 files changed, 17 insertions(+), 29 deletions(-) diff --git a/vyper/evm/assembler/symbols.py b/vyper/evm/assembler/symbols.py index fc17a3b1ea..fb79ef0714 100644 --- a/vyper/evm/assembler/symbols.py +++ b/vyper/evm/assembler/symbols.py @@ -67,21 +67,14 @@ def __eq__(self, other): def _resolve_operand(self, operand: str | int, symbol_map: dict[SymbolKey, int]) -> int | None: if isinstance(operand, str): - # Handle @ prefix for label references - if operand.startswith("@"): - label_name = operand[1:] - label = Label(label_name) - if label in symbol_map: - return symbol_map[label] - else: - # Try as CONSTREF first - op_ref = CONSTREF(operand) - if op_ref in symbol_map: - return symbol_map[op_ref] - # Try as Label - label = Label(operand) - if label in symbol_map: - return symbol_map[label] + # Try as CONSTREF first + op_ref = CONSTREF(operand) + if op_ref in symbol_map: + return symbol_map[op_ref] + # Try as Label + label = Label(operand) + if label in symbol_map: + return symbol_map[label] elif isinstance(operand, int): return operand return None diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 288d197938..fbe038250d 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -254,7 +254,6 @@ def __str__(self): class LabelRef: """ Reference to a label in Venom IR. - Replaces the @-prefixed string representation. """ name: str diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index b9e5887eb1..160ce9fd0f 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -237,19 +237,15 @@ def normalize_expr(e): # Binary operation op_name, arg1, arg2 = expr # Convert typed objects to strings for assembler - if isinstance(arg1, IRLabel): - arg1 = arg1.value - elif isinstance(arg1, ConstRef): - arg1 = arg1.name # No prefix for assembler - elif isinstance(arg1, LabelRef): - arg1 = f"@{arg1.name}" # Add @ prefix for assembler - - if isinstance(arg2, IRLabel): - arg2 = arg2.value - elif isinstance(arg2, ConstRef): - arg2 = arg2.name # No prefix for assembler - elif isinstance(arg2, LabelRef): - arg2 = f"@{arg2.name}" # Add @ prefix for assembler + assert isinstance(arg1, (IRLabel, ConstRef, LabelRef)), ( + f"ConstRef, or LabelRef, got {type(arg1)}" + ) + arg1 = arg1.name + + assert isinstance(arg2, (IRLabel, ConstRef, LabelRef)), ( + f"ConstRef, or LabelRef, got {type(arg2)}" + ) + arg2 = arg2.name # Emit the appropriate CONST_* operation if op_name == "add": From 49f430447412fe0012f23cf3d8d3d688cb639e71 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 17 Jul 2025 23:10:46 +0300 Subject: [PATCH 153/172] wip --- tests/unit/venom/test_const_expressions.py | 12 ++++---- vyper/venom/const_eval.py | 4 +-- vyper/venom/resolve_const.py | 13 ++++++-- vyper/venom/venom_to_assembly.py | 36 ++++++++++++++++------ 4 files changed, 45 insertions(+), 20 deletions(-) diff --git a/tests/unit/venom/test_const_expressions.py b/tests/unit/venom/test_const_expressions.py index e9b38ccf30..ea2d7c2532 100644 --- a/tests/unit/venom/test_const_expressions.py +++ b/tests/unit/venom/test_const_expressions.py @@ -249,13 +249,13 @@ def test_try_evaluate_undefined_const(): assert len(unresolved_consts) == 0 assert len(const_refs) == 0 - # Test undefined constant - returns label + # Test undefined constant - returns ConstRef object result = try_evaluate_const_expr(ConstRef("B"), constants, global_labels, unresolved_consts, const_refs) - assert isinstance(result, str) - assert result == "B" # Now uses the constant name directly + assert isinstance(result, ConstRef) + assert result.name == "B" # The ConstRef has the name assert "B" in const_refs - assert result in unresolved_consts - assert unresolved_consts[result] == ("ref", "B") + assert "B" in unresolved_consts + assert unresolved_consts["B"] == ("ref", "B") def test_try_evaluate_undefined_in_operation(): @@ -278,7 +278,7 @@ def test_try_evaluate_undefined_in_operation(): op_name, arg1, arg2 = unresolved_consts[result] assert op_name == "add" assert arg1 == 10 # A was resolved - assert isinstance(arg2, str) and arg2 == "B" # B is unresolved + assert isinstance(arg2, ConstRef) and arg2.name == "B" # B is unresolved # Operation with both undefined unresolved_consts.clear() diff --git a/vyper/venom/const_eval.py b/vyper/venom/const_eval.py index 594b394fb4..657fd49169 100644 --- a/vyper/venom/const_eval.py +++ b/vyper/venom/const_eval.py @@ -119,7 +119,7 @@ def try_evaluate_const_expr( const_refs.add(expr.name) if expr.name not in unresolved_consts: unresolved_consts[expr.name] = ("ref", expr.name) - return expr.name + return expr return constants[expr.name] if isinstance(expr, LabelRef): @@ -131,7 +131,7 @@ def try_evaluate_const_expr( const_refs.add(expr.name) if expr.name not in unresolved_consts: unresolved_consts[expr.name] = ("ref", expr.name) - return expr.name + return expr if isinstance(expr, str): # String should not appear in well-formed expressions diff --git a/vyper/venom/resolve_const.py b/vyper/venom/resolve_const.py index 394c8d2726..f6f4e57d75 100644 --- a/vyper/venom/resolve_const.py +++ b/vyper/venom/resolve_const.py @@ -1,3 +1,4 @@ +from vyper.exceptions import CompilerPanic from vyper.venom.basicblock import ConstRef, IRLabel, IRLiteral, IROperand, LabelRef from vyper.venom.const_eval import try_evaluate_const_expr from vyper.venom.context import IRContext @@ -29,9 +30,17 @@ def resolve_const_operands(ctx: IRContext) -> None: ) if isinstance(result, int): new_operands.append(IRLiteral(result)) - else: - # Return as label for unresolved expressions + elif isinstance(result, ConstRef): + # Convert unresolved ConstRef to IRLabel + new_operands.append(IRLabel(result.name, True)) + elif isinstance(result, LabelRef): + # Convert unresolved LabelRef to IRLabel + new_operands.append(IRLabel(result.name, True)) + elif isinstance(result, str): + # String result from unresolved expressions new_operands.append(IRLabel(result, True)) + else: + raise CompilerPanic(f"Unexpected result type from const eval: {type(result)} {result}") else: new_operands.append(op) inst.operands = new_operands diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 160ce9fd0f..0a6a886e0f 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -237,15 +237,23 @@ def normalize_expr(e): # Binary operation op_name, arg1, arg2 = expr # Convert typed objects to strings for assembler - assert isinstance(arg1, (IRLabel, ConstRef, LabelRef)), ( - f"ConstRef, or LabelRef, got {type(arg1)}" - ) - arg1 = arg1.name - - assert isinstance(arg2, (IRLabel, ConstRef, LabelRef)), ( - f"ConstRef, or LabelRef, got {type(arg2)}" - ) - arg2 = arg2.name + if isinstance(arg1, ConstRef): + arg1 = arg1.name + elif isinstance(arg1, LabelRef): + arg1 = arg1.name + elif isinstance(arg1, IRLabel): + arg1 = arg1.value + elif not isinstance(arg1, (int, str)): + raise CompilerPanic(f"Unexpected arg1 type: {type(arg1)} {arg1}") + + if isinstance(arg2, ConstRef): + arg2 = arg2.name + elif isinstance(arg2, LabelRef): + arg2 = arg2.name + elif isinstance(arg2, IRLabel): + arg2 = arg2.value + elif not isinstance(arg2, (int, str)): + raise CompilerPanic(f"Unexpected arg2 type: {type(arg2)} {arg2}") # Emit the appropriate CONST_* operation if op_name == "add": @@ -262,7 +270,15 @@ def normalize_expr(e): for _label_name, expr in self.ctx.unresolved_consts.items(): if isinstance(expr, tuple) and len(expr) == 3: _, arg1, arg2 = expr - if arg1 == bb.label.value or arg2 == bb.label.value: + # Extract label names from typed objects + label1 = None + label2 = None + if isinstance(arg1, LabelRef): + label1 = arg1.name + if isinstance(arg2, LabelRef): + label2 = arg2.name + + if label1 == bb.label.value or label2 == bb.label.value: bb.is_pinned = True for fn in self.ctx.functions.values(): From 9111c8a282ccdff094f8f2f05b6d6344520d6e4a Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 17 Jul 2025 23:25:55 +0300 Subject: [PATCH 154/172] Add `UnresolvedConst` --- tests/unit/venom/test_const_expressions.py | 14 +++++++------- vyper/venom/basicblock.py | 12 ++++++++++++ vyper/venom/const_eval.py | 8 ++++---- vyper/venom/parser.py | 7 +++++-- vyper/venom/resolve_const.py | 8 ++++---- vyper/venom/venom_to_assembly.py | 1 + 6 files changed, 33 insertions(+), 17 deletions(-) diff --git a/tests/unit/venom/test_const_expressions.py b/tests/unit/venom/test_const_expressions.py index ea2d7c2532..92017edef7 100644 --- a/tests/unit/venom/test_const_expressions.py +++ b/tests/unit/venom/test_const_expressions.py @@ -2,7 +2,7 @@ from vyper.evm.assembler.core import assembly_to_evm from vyper.evm.assembler.symbols import CONST, CONST_ADD, Label -from vyper.venom.basicblock import ConstRef, IRLabel, IRLiteral, LabelRef +from vyper.venom.basicblock import ConstRef, IRLabel, IRLiteral, LabelRef, UnresolvedConst from vyper.venom.const_eval import ConstEvalException, evaluate_const_expr, try_evaluate_const_expr from vyper.venom.parser import parse_venom from vyper.venom.resolve_const import resolve_const_operands @@ -269,13 +269,13 @@ def test_try_evaluate_undefined_in_operation(): result = try_evaluate_const_expr( ("add", ConstRef("A"), ConstRef("B")), constants, global_labels, unresolved_consts, const_refs ) - assert isinstance(result, str) - assert result.startswith("__const_") # Complex expressions still get generated names + assert isinstance(result, UnresolvedConst) + assert result.name.startswith("__const_") # Complex expressions still get generated names assert "B" in const_refs # Check that the unresolved expression is stored correctly - assert result in unresolved_consts - op_name, arg1, arg2 = unresolved_consts[result] + assert result.name in unresolved_consts + op_name, arg1, arg2 = unresolved_consts[result.name] assert op_name == "add" assert arg1 == 10 # A was resolved assert isinstance(arg2, ConstRef) and arg2.name == "B" # B is unresolved @@ -286,8 +286,8 @@ def test_try_evaluate_undefined_in_operation(): result = try_evaluate_const_expr( ("mul", ConstRef("B"), ConstRef("C")), constants, global_labels, unresolved_consts, const_refs ) - assert isinstance(result, str) - assert result.startswith("__const_") + assert isinstance(result, UnresolvedConst) + assert result.name.startswith("__const_") assert "B" in const_refs assert "C" in const_refs diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index fbe038250d..3f5e9fd838 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -261,6 +261,18 @@ def __str__(self): return f"@{self.name}" +@dataclass(frozen=True) +class UnresolvedConst: + """ + Represents an unresolved constant expression in Venom IR. + Used when a complex expression cannot be evaluated at parse time. + """ + name: str + + def __str__(self): + return f"${self.name}" + + class IRInstruction: """ IRInstruction represents an instruction in IR. Each instruction has an opcode, diff --git a/vyper/venom/const_eval.py b/vyper/venom/const_eval.py index 657fd49169..c23855240c 100644 --- a/vyper/venom/const_eval.py +++ b/vyper/venom/const_eval.py @@ -6,7 +6,7 @@ from typing import Any, Union from vyper.exceptions import CompilerPanic -from vyper.venom.basicblock import ConstRef, IRLabel, IRLiteral, LabelRef +from vyper.venom.basicblock import ConstRef, IRLabel, IRLiteral, LabelRef, UnresolvedConst class ConstEvalException(CompilerPanic): @@ -92,7 +92,7 @@ def try_evaluate_const_expr( global_labels: dict[str, int], unresolved_consts: dict[str, Any], const_refs: set[str], -) -> Union[int, str]: +) -> Union[int, ConstRef, LabelRef, UnresolvedConst]: # Handle simple cases first if isinstance(expr, int): return expr @@ -111,7 +111,7 @@ def try_evaluate_const_expr( const_refs.add(label_name) if label_name not in unresolved_consts: unresolved_consts[label_name] = ("ref", label_name) - return label_name + return LabelRef(label_name) if isinstance(expr, ConstRef): if expr.name not in constants: @@ -176,6 +176,6 @@ def try_evaluate_const_expr( # Otherwise, create a label for this unresolved expression label = generate_const_label_name() unresolved_consts[label] = (op_name, val1, val2) - return label + return UnresolvedConst(label) raise ConstEvalException(f"Invalid constant expression format: {expr}") diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index 129c834f00..ccea621df9 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -13,6 +13,7 @@ IROperand, IRVariable, LabelRef, + UnresolvedConst, ) from vyper.venom.const_eval import evaluate_const_expr, try_evaluate_const_expr from vyper.venom.context import IRContext @@ -256,9 +257,11 @@ def _try_evaluate_const_expr(self, expr, ctx: IRContext) -> IROperand: ) if isinstance(result, int): return IRLiteral(result) + elif isinstance(result, (ConstRef, LabelRef, UnresolvedConst)): + # Extract the name from typed objects for IRLabel + return IRLabel(result.name, True) else: - # result is a label name for unresolved constant - return IRLabel(result, True) + raise ValueError(f"Unexpected result type from try_evaluate_const_expr: {type(result)}") def const_def(self, children) -> _ConstDef: # Filter out NEWLINE tokens diff --git a/vyper/venom/resolve_const.py b/vyper/venom/resolve_const.py index f6f4e57d75..a4bb31752d 100644 --- a/vyper/venom/resolve_const.py +++ b/vyper/venom/resolve_const.py @@ -1,5 +1,5 @@ from vyper.exceptions import CompilerPanic -from vyper.venom.basicblock import ConstRef, IRLabel, IRLiteral, IROperand, LabelRef +from vyper.venom.basicblock import ConstRef, IRLabel, IRLiteral, IROperand, LabelRef, UnresolvedConst from vyper.venom.const_eval import try_evaluate_const_expr from vyper.venom.context import IRContext @@ -36,9 +36,9 @@ def resolve_const_operands(ctx: IRContext) -> None: elif isinstance(result, LabelRef): # Convert unresolved LabelRef to IRLabel new_operands.append(IRLabel(result.name, True)) - elif isinstance(result, str): - # String result from unresolved expressions - new_operands.append(IRLabel(result, True)) + elif isinstance(result, UnresolvedConst): + # Convert unresolved const expression to IRLabel + new_operands.append(IRLabel(result.name, True)) else: raise CompilerPanic(f"Unexpected result type from const eval: {type(result)} {result}") else: diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 0a6a886e0f..eefe4aa40a 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -30,6 +30,7 @@ IROperand, IRVariable, LabelRef, + UnresolvedConst, ) from vyper.venom.const_eval import try_evaluate_const_expr from vyper.venom.context import IRContext, IRFunction From f6d07ed8768ded670cd21d315981df87a0a9d718 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 00:10:33 +0300 Subject: [PATCH 155/172] lint --- .../compiler/venom/test_venom_to_assembly.py | 2 +- tests/unit/venom/test_const_expressions.py | 32 +++++++++++++++---- vyper/evm/assembler/core.py | 16 ++++++---- vyper/venom/basicblock.py | 8 ++++- vyper/venom/resolve_const.py | 19 ++++++++--- vyper/venom/venom_to_assembly.py | 28 +++++++++------- 6 files changed, 74 insertions(+), 31 deletions(-) diff --git a/tests/unit/compiler/venom/test_venom_to_assembly.py b/tests/unit/compiler/venom/test_venom_to_assembly.py index edf4bda972..a62b7e3e00 100644 --- a/tests/unit/compiler/venom/test_venom_to_assembly.py +++ b/tests/unit/compiler/venom/test_venom_to_assembly.py @@ -1,4 +1,4 @@ -from vyper.evm.assembler.core import PUSHLABEL, PUSHSYMBOL, Label +from vyper.evm.assembler.core import PUSHLABEL, Label from vyper.venom.parser import parse_venom from vyper.venom.venom_to_assembly import VenomCompiler diff --git a/tests/unit/venom/test_const_expressions.py b/tests/unit/venom/test_const_expressions.py index 92017edef7..5b685f66cc 100644 --- a/tests/unit/venom/test_const_expressions.py +++ b/tests/unit/venom/test_const_expressions.py @@ -34,12 +34,18 @@ def test_basic_const_eval(): assert evaluate_const_expr(("min", 10, 20), constants, global_labels) == 10 # Test operations with references - assert evaluate_const_expr(("add", ConstRef("A"), ConstRef("B")), constants, global_labels) == 30 - assert evaluate_const_expr(("add", LabelRef("label1"), 0x100), constants, global_labels) == 0x200 + assert ( + evaluate_const_expr(("add", ConstRef("A"), ConstRef("B")), constants, global_labels) == 30 + ) + assert ( + evaluate_const_expr(("add", LabelRef("label1"), 0x100), constants, global_labels) == 0x200 + ) # Test nested operations assert evaluate_const_expr(("add", ("mul", 2, 3), 4), constants, global_labels) == 10 - assert evaluate_const_expr(("mul", ("add", ConstRef("A"), 5), 2), constants, global_labels) == 30 + assert ( + evaluate_const_expr(("mul", ("add", ConstRef("A"), 5), 2), constants, global_labels) == 30 + ) def test_const_eval_errors(): @@ -244,13 +250,17 @@ def test_try_evaluate_undefined_const(): const_refs = set() # Test defined constant - returns value - result = try_evaluate_const_expr(ConstRef("A"), constants, global_labels, unresolved_consts, const_refs) + result = try_evaluate_const_expr( + ConstRef("A"), constants, global_labels, unresolved_consts, const_refs + ) assert result == 10 assert len(unresolved_consts) == 0 assert len(const_refs) == 0 # Test undefined constant - returns ConstRef object - result = try_evaluate_const_expr(ConstRef("B"), constants, global_labels, unresolved_consts, const_refs) + result = try_evaluate_const_expr( + ConstRef("B"), constants, global_labels, unresolved_consts, const_refs + ) assert isinstance(result, ConstRef) assert result.name == "B" # The ConstRef has the name assert "B" in const_refs @@ -267,7 +277,11 @@ def test_try_evaluate_undefined_in_operation(): # Operation with one undefined constant result = try_evaluate_const_expr( - ("add", ConstRef("A"), ConstRef("B")), constants, global_labels, unresolved_consts, const_refs + ("add", ConstRef("A"), ConstRef("B")), + constants, + global_labels, + unresolved_consts, + const_refs, ) assert isinstance(result, UnresolvedConst) assert result.name.startswith("__const_") # Complex expressions still get generated names @@ -284,7 +298,11 @@ def test_try_evaluate_undefined_in_operation(): unresolved_consts.clear() const_refs.clear() result = try_evaluate_const_expr( - ("mul", ConstRef("B"), ConstRef("C")), constants, global_labels, unresolved_consts, const_refs + ("mul", ConstRef("B"), ConstRef("C")), + constants, + global_labels, + unresolved_consts, + const_refs, ) assert isinstance(result, UnresolvedConst) assert result.name.startswith("__const_") diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index bd632eff0e..e5928fd048 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -92,6 +92,7 @@ class PUSHSYMBOL: Universal symbol reference instruction that can handle any symbol type. The assembler will resolve whether it's a label, constant, or const reference. """ + def __init__(self, symbol_name: str): assert isinstance(symbol_name, str) self.symbol_name = symbol_name @@ -402,7 +403,7 @@ def resolve_symbols( elif isinstance(item, PUSHSYMBOL): # Determine symbol type and appropriate size symbol_name = item.symbol_name - + # Strip $ prefix if present for const references if symbol_name.startswith("$"): const_name = symbol_name[1:] @@ -442,7 +443,7 @@ def resolve_symbols( pc += calc_push_size(val) else: # Treat it as a label-dependent reference using PUSH2 size - pc += SYMBOL_SIZE + 1 # PUSH2 + pc += SYMBOL_SIZE + 1 # PUSH2 else: # pragma: nocover raise CompilerPanic(f"invalid ofst {item.label}") @@ -665,7 +666,7 @@ def _assembly_to_evm( elif isinstance(item, PUSHSYMBOL): # Resolve the symbol and push appropriate value symbol_name = item.symbol_name - + # Handle const references (starting with $) if symbol_name.startswith("$"): const_name = symbol_name[1:] @@ -702,9 +703,12 @@ def _assembly_to_evm( bytecode = _compile_push_instruction(PUSH(val)) ret.extend(bytecode) else: - # Symbol not found. This will raise KeyError with the actual undefined symbol. + # Symbol not found. This will raise KeyError with the + # actual undefined symbol. # This is the same behavior as PUSHLABEL. - bytecode = _compile_push_instruction(PUSH_N(symbol_map[label], n=SYMBOL_SIZE)) + bytecode = _compile_push_instruction( + PUSH_N(symbol_map[label], n=SYMBOL_SIZE) + ) ret.extend(bytecode) elif isinstance(item, JUMPDEST): @@ -721,7 +725,7 @@ def _assembly_to_evm( else: assert isinstance(item.label, CONSTREF) const_name = item.label.label - + # Try to look up as a CONSTREF first if item.label in symbol_map: ofst = symbol_map[item.label] + item.ofst diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 3f5e9fd838..3ea67446e5 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -221,7 +221,6 @@ class IRLabel(IROperand): value: str address: Optional[int] = None # optional address override - def __init__(self, value: str, is_symbol: bool = False, address: Optional[int] = None) -> None: assert isinstance(value, str), f"not a str: {value} ({type(value)})" assert len(value) > 0 @@ -244,6 +243,7 @@ class ConstRef: Reference to a named constant in Venom IR. Replaces the $-prefixed string representation. """ + name: str def __str__(self): @@ -255,6 +255,7 @@ class LabelRef: """ Reference to a label in Venom IR. """ + name: str def __str__(self): @@ -267,6 +268,7 @@ class UnresolvedConst: Represents an unresolved constant expression in Venom IR. Used when a complex expression cannot be evaluated at parse time. """ + name: str def __str__(self): @@ -469,6 +471,7 @@ def str_short(self) -> str: operands = self.operands if opcode not in ["jmp", "jnz", "djmp", "invoke"]: operands = list(reversed(operands)) + def format_operand(op): if isinstance(op, IRLabel): return f"@{op}" @@ -476,6 +479,7 @@ def format_operand(op): return str(op) # Uses their __str__ methods which add prefixes else: return str(op) + s += ", ".join([format_operand(op) for op in operands]) return s @@ -490,6 +494,7 @@ def __repr__(self) -> str: operands = [operands[0]] + list(reversed(operands[1:])) elif self.opcode not in ("jmp", "jnz", "djmp", "phi"): operands = reversed(operands) # type: ignore + def format_operand(op): if isinstance(op, IRLabel): return f"@{op}" @@ -497,6 +502,7 @@ def format_operand(op): return str(op) # Uses their __str__ methods which add prefixes else: return str(op) + s += ", ".join([format_operand(op) for op in operands]) if self.annotation: diff --git a/vyper/venom/resolve_const.py b/vyper/venom/resolve_const.py index a4bb31752d..1551b6bcc4 100644 --- a/vyper/venom/resolve_const.py +++ b/vyper/venom/resolve_const.py @@ -1,5 +1,12 @@ from vyper.exceptions import CompilerPanic -from vyper.venom.basicblock import ConstRef, IRLabel, IRLiteral, IROperand, LabelRef, UnresolvedConst +from vyper.venom.basicblock import ( + ConstRef, + IRLabel, + IRLiteral, + IROperand, + LabelRef, + UnresolvedConst, +) from vyper.venom.const_eval import try_evaluate_const_expr from vyper.venom.context import IRContext @@ -19,7 +26,9 @@ def resolve_const_operands(ctx: IRContext) -> None: for inst in bb.instructions: new_operands = [] for op in inst.operands: - if isinstance(op, (tuple, ConstRef, LabelRef)) and not isinstance(op, IROperand): + if isinstance(op, (tuple, ConstRef, LabelRef)) and not isinstance( + op, IROperand + ): # This is a raw const expression - evaluate it result = try_evaluate_const_expr( op, @@ -34,13 +43,15 @@ def resolve_const_operands(ctx: IRContext) -> None: # Convert unresolved ConstRef to IRLabel new_operands.append(IRLabel(result.name, True)) elif isinstance(result, LabelRef): - # Convert unresolved LabelRef to IRLabel + # Convert unresolved LabelRef to IRLabel new_operands.append(IRLabel(result.name, True)) elif isinstance(result, UnresolvedConst): # Convert unresolved const expression to IRLabel new_operands.append(IRLabel(result.name, True)) else: - raise CompilerPanic(f"Unexpected result type from const eval: {type(result)} {result}") + raise CompilerPanic( + f"Unexpected result type from const eval: {type(result)} {result}" + ) else: new_operands.append(op) inst.operands = new_operands diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index eefe4aa40a..d6ec541fbc 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -30,7 +30,6 @@ IROperand, IRVariable, LabelRef, - UnresolvedConst, ) from vyper.venom.const_eval import try_evaluate_const_expr from vyper.venom.context import IRContext, IRFunction @@ -188,22 +187,25 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr # Check if try_evaluate_const_expr already added this to unresolved_consts # under a different name (e.g., __const_0). If so, update it to use our name. found_existing = False - + # Normalize the expression for comparison def normalize_expr(e): # With typed objects, expressions are already normalized return e - + normalized_expr = normalize_expr(expr) - + for existing_name, existing_expr in list(self.ctx.unresolved_consts.items()): - if existing_name.startswith("__const_") and normalize_expr(existing_expr) == normalized_expr: + if ( + existing_name.startswith("__const_") + and normalize_expr(existing_expr) == normalized_expr + ): # Remove the auto-generated name and use our explicit name del self.ctx.unresolved_consts[existing_name] self.ctx.unresolved_consts[name] = existing_expr found_existing = True break - + if not found_existing: # Store as unresolved constant self.ctx.unresolved_consts[name] = expr @@ -246,7 +248,7 @@ def normalize_expr(e): arg1 = arg1.value elif not isinstance(arg1, (int, str)): raise CompilerPanic(f"Unexpected arg1 type: {type(arg1)} {arg1}") - + if isinstance(arg2, ConstRef): arg2 = arg2.name elif isinstance(arg2, LabelRef): @@ -255,7 +257,7 @@ def normalize_expr(e): arg2 = arg2.value elif not isinstance(arg2, (int, str)): raise CompilerPanic(f"Unexpected arg2 type: {type(arg2)} {arg2}") - + # Emit the appropriate CONST_* operation if op_name == "add": asm.append(CONST_ADD(label_name, arg1, arg2)) # type: ignore[arg-type] @@ -278,7 +280,7 @@ def normalize_expr(e): label1 = arg1.name if isinstance(arg2, LabelRef): label2 = arg2.name - + if label1 == bb.label.value or label2 == bb.label.value: bb.is_pinned = True @@ -361,9 +363,11 @@ def _emit_input_operands( # to emit it here but we need to add it to the stack map if inst.opcode != "invoke": # Check if this label is a constant reference - if (op.value in self.ctx.unresolved_consts or - op.value in self.ctx.constants or - op.value in self.ctx.const_expressions): + if ( + op.value in self.ctx.unresolved_consts + or op.value in self.ctx.constants + or op.value in self.ctx.const_expressions + ): # For all constants, use PUSH_OFST with CONSTREF # This ensures consistent handling whether they're simple refs or # expressions From 3bdeb090ee3374b309cc0231a8ba04760540e71f Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 10:35:30 +0300 Subject: [PATCH 156/172] parser cleanup --- vyper/venom/parser.py | 82 ++++++++++++------------------------------- 1 file changed, 23 insertions(+), 59 deletions(-) diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index ccea621df9..2ae28a8567 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -1,5 +1,5 @@ import json -from typing import Optional +from typing import Any, Optional, Union from lark import Lark, Transformer @@ -13,9 +13,7 @@ IROperand, IRVariable, LabelRef, - UnresolvedConst, ) -from vyper.venom.const_eval import evaluate_const_expr, try_evaluate_const_expr from vyper.venom.context import IRContext from vyper.venom.function import IRFunction @@ -132,15 +130,22 @@ class _ConstDef(_TypedItem): class _LabelDecl: """Represents a block declaration in the parse tree.""" - def __init__( - self, label: str, address: Optional[int] = None, tags: Optional[list[str]] = None - ) -> None: + def __init__(self, label: str, tags: Optional[list[str]] = None) -> None: self.label = label - self.address = address self.tags = tags or [] class VenomTransformer(Transformer): + def _filter_newlines(self, children: list) -> list: + """Filter out NEWLINE tokens from children list.""" + return [c for c in children if not (hasattr(c, "type") and c.type == "NEWLINE")] + + def _get_token_value(self, token) -> str: + """Extract value from a Lark token or return str representation.""" + if hasattr(token, "value"): + return token.value + return str(token) + def start(self, children) -> IRContext: ctx = IRContext() @@ -188,24 +193,17 @@ def start(self, children) -> IRContext: # label starts a new block that contains all instructions until # the next label or end of function. current_block_label: Optional[str] = None - current_block_address: Optional[int] = None current_block_tags: list[str] = [] current_block_instructions: list[IRInstruction] = [] - blocks: list[tuple[str, Optional[int], list[IRInstruction], list[str]]] = [] + blocks: list[tuple[str, list[IRInstruction], list[str]]] = [] for item in items: if isinstance(item, _LabelDecl): if current_block_label is not None: blocks.append( - ( - current_block_label, - current_block_address, - current_block_instructions, - current_block_tags, - ) + (current_block_label, current_block_instructions, current_block_tags) ) current_block_label = item.label - current_block_address = item.address # Will always be None now current_block_tags = item.tags current_block_instructions = [] elif isinstance(item, IRInstruction): @@ -214,18 +212,11 @@ def start(self, children) -> IRContext: current_block_instructions.append(item) if current_block_label is not None: - blocks.append( - ( - current_block_label, - current_block_address, - current_block_instructions, - current_block_tags, - ) - ) + blocks.append((current_block_label, current_block_instructions, current_block_tags)) for block_data in blocks: - # All blocks now have: (block_name, address, instructions, tags) - block_name, _address, instructions, tags = block_data + # All blocks now have: (block_name, instructions, tags) + block_name, instructions, tags = block_data bb = IRBasicBlock(IRLabel(block_name, True), fn) # Set is_volatile if "pinned" tag is present @@ -244,34 +235,13 @@ def start(self, children) -> IRContext: return ctx - def _evaluate_const_expr( - self, expr, constants: dict[str, int], global_labels: dict[str, int] - ) -> int: - """Helper method to evaluate const expressions.""" - return evaluate_const_expr(expr, constants, global_labels) - - def _try_evaluate_const_expr(self, expr, ctx: IRContext) -> IROperand: - """Try to evaluate const expression, returning IRLabel for unresolved parts.""" - result = try_evaluate_const_expr( - expr, ctx.constants, ctx.global_labels, ctx.unresolved_consts, ctx.const_refs - ) - if isinstance(result, int): - return IRLiteral(result) - elif isinstance(result, (ConstRef, LabelRef, UnresolvedConst)): - # Extract the name from typed objects for IRLabel - return IRLabel(result.name, True) - else: - raise ValueError(f"Unexpected result type from try_evaluate_const_expr: {type(result)}") - def const_def(self, children) -> _ConstDef: - # Filter out NEWLINE tokens - filtered = [c for c in children if not (hasattr(c, "type") and c.type == "NEWLINE")] + filtered = self._filter_newlines(children) name, expr = filtered return _ConstDef([str(name), expr]) def global_label(self, children) -> _GlobalLabel: - # Filter out NEWLINE tokens - filtered = [c for c in children if not (hasattr(c, "type") and c.type == "NEWLINE")] + filtered = self._filter_newlines(children) name, expr = filtered return _GlobalLabel([name, expr]) @@ -296,7 +266,7 @@ def label_decl(self, children) -> _LabelDecl: elif isinstance(child, list): # tag_list returns a list tags = child - return _LabelDecl(label, None, tags) + return _LabelDecl(label, tags) def statement(self, children) -> IRInstruction: # children[0] is the instruction/assignment, rest are NEWLINE tokens @@ -332,23 +302,17 @@ def expr(self, children) -> IRInstruction | IROperand: def instruction(self, children) -> IRInstruction: if len(children) == 1: # just the opcode (IDENT) - opcode = str(children[0]) - # Handle Lark tokens - if hasattr(children[0], "value"): - opcode = children[0].value + opcode = self._get_token_value(children[0]) operands = [] elif len(children) == 2: # Two cases: IDENT + operands_list OR "db" + operands_list - opcode = str(children[0]) - # Handle Lark tokens - if hasattr(children[0], "value"): - opcode = children[0].value + opcode = self._get_token_value(children[0]) operands = children[1] else: raise ValueError(f"Unexpected instruction children: {children}") # Process operands - evaluate const expressions if needed - processed_operands = [] + processed_operands: list[Union[str, tuple[Any, ...], IROperand]] = [] for op in operands: if isinstance(op, (str, tuple)) and not isinstance(op, IROperand): # This is a const expression that needs evaluation From 124f97baefc57c068821826f2fa2c6236543a4c7 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 11:02:38 +0300 Subject: [PATCH 157/172] venom_to_assembly cleanup --- vyper/venom/venom_to_assembly.py | 70 +++++++++++--------------------- 1 file changed, 24 insertions(+), 46 deletions(-) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index d6ec541fbc..e507b5d72a 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Iterable +from typing import Any, Iterable, Union from vyper.evm.assembler.core import ( DATA_ITEM, @@ -59,7 +59,6 @@ "number", "extcodesize", "extcodehash", - "codecopy", "extcodecopy", "returndatasize", "returndatacopy", @@ -148,7 +147,6 @@ def _ofst(label: Label, value: int) -> list[Any]: # with the assembler. My suggestion is to let this be for now, and we can # refactor it later when we are finished phasing out the old IR. class VenomCompiler: - ctxs: list[IRContext] label_counter = 0 visited_basicblocks: OrderedSet # {IRBasicBlock} liveness: LivenessAnalysis @@ -156,11 +154,23 @@ class VenomCompiler: cfg: CFGAnalysis def __init__(self, ctx: IRContext): - # TODO: maybe just accept a single IRContext self.ctx = ctx self.label_counter = 0 self.visited_basicblocks = OrderedSet() + def _extract_label_name(self, obj: Any) -> Union[int, str]: + """Extract label name from typed objects or return as-is for int/str.""" + if isinstance(obj, ConstRef): + return obj.name + elif isinstance(obj, LabelRef): + return obj.name + elif isinstance(obj, IRLabel): + return obj.value + elif isinstance(obj, (int, str)): + return obj + else: + raise CompilerPanic(f"Unexpected type: {type(obj)} {obj}") + def mklabel(self, name: str) -> Label: self.label_counter += 1 return Label(f"{name}_{self.label_counter}") @@ -188,18 +198,8 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr # under a different name (e.g., __const_0). If so, update it to use our name. found_existing = False - # Normalize the expression for comparison - def normalize_expr(e): - # With typed objects, expressions are already normalized - return e - - normalized_expr = normalize_expr(expr) - for existing_name, existing_expr in list(self.ctx.unresolved_consts.items()): - if ( - existing_name.startswith("__const_") - and normalize_expr(existing_expr) == normalized_expr - ): + if existing_name.startswith("__const_") and existing_expr == expr: # Remove the auto-generated name and use our explicit name del self.ctx.unresolved_consts[existing_name] self.ctx.unresolved_consts[name] = existing_expr @@ -240,23 +240,8 @@ def normalize_expr(e): # Binary operation op_name, arg1, arg2 = expr # Convert typed objects to strings for assembler - if isinstance(arg1, ConstRef): - arg1 = arg1.name - elif isinstance(arg1, LabelRef): - arg1 = arg1.name - elif isinstance(arg1, IRLabel): - arg1 = arg1.value - elif not isinstance(arg1, (int, str)): - raise CompilerPanic(f"Unexpected arg1 type: {type(arg1)} {arg1}") - - if isinstance(arg2, ConstRef): - arg2 = arg2.name - elif isinstance(arg2, LabelRef): - arg2 = arg2.name - elif isinstance(arg2, IRLabel): - arg2 = arg2.value - elif not isinstance(arg2, (int, str)): - raise CompilerPanic(f"Unexpected arg2 type: {type(arg2)} {arg2}") + arg1 = self._extract_label_name(arg1) + arg2 = self._extract_label_name(arg2) # Emit the appropriate CONST_* operation if op_name == "add": @@ -274,12 +259,13 @@ def normalize_expr(e): if isinstance(expr, tuple) and len(expr) == 3: _, arg1, arg2 = expr # Extract label names from typed objects - label1 = None - label2 = None - if isinstance(arg1, LabelRef): - label1 = arg1.name - if isinstance(arg2, LabelRef): - label2 = arg2.name + + label1 = ( + self._extract_label_name(arg1) if not isinstance(arg1, int) else None + ) + label2 = ( + self._extract_label_name(arg2) if not isinstance(arg2, int) else None + ) if label1 == bb.label.value or label2 == bb.label.value: bb.is_pinned = True @@ -644,10 +630,6 @@ def _generate_evm_for_instruction( if_nonzero_label, if_zero_label = inst.get_label_operands() assembly.append(PUSHLABELJUMPDEST(_as_asm_symbol(if_nonzero_label))) assembly.append("JUMPI") - - # make sure the if_zero_label will be optimized out - # assert if_zero_label == next(iter(inst.parent.cfg_out)).label - assembly.append(PUSHLABELJUMPDEST(_as_asm_symbol(if_zero_label))) assembly.append("JUMP") @@ -746,10 +728,6 @@ def _optimistic_swap(self, assembly, inst, next_liveness, stack): if DEBUG_SHOW_COST: stack0 = stack.copy() - # Handle empty liveness set - # if not next_liveness: - # return - next_scheduled = next_liveness.last() cost = 0 if not self.dfg.are_equivalent(inst.output, next_scheduled): From 9d32eeae76be613e79bba4b4663ce2f37e7b6391 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 11:29:40 +0300 Subject: [PATCH 158/172] cleanup --- vyper/evm/assembler/core.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index e5928fd048..ad88286c25 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -170,10 +170,6 @@ def is_symbol(i): return isinstance(i, Label) -def is_ofst(assembly_item): - return isinstance(assembly_item, PUSH_OFST) - - AssemblyInstruction = ( str | TaggedInstruction @@ -248,13 +244,14 @@ def _resolve_constants( continue # Check if this constant depends on other label-dependent constants + depends_on_label = False for operand in [item.op1, item.op2]: if isinstance(operand, str) and operand in label_dependent_consts: label_dependent_consts.add(item.name) - continue # Skip this constant too + depends_on_label = True + break - # Skip if we already know it's label-dependent - if item.name in label_dependent_consts: + if depends_on_label: continue # Calculate the value if possible From 192c757be2ad97928fb162da13ef985a9d0961be Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 14:50:59 +0300 Subject: [PATCH 159/172] handle empty liveness --- vyper/venom/venom_to_assembly.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index e507b5d72a..bc823b6c1c 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -728,6 +728,9 @@ def _optimistic_swap(self, assembly, inst, next_liveness, stack): if DEBUG_SHOW_COST: stack0 = stack.copy() + if not next_liveness: + return + next_scheduled = next_liveness.last() cost = 0 if not self.dfg.are_equivalent(inst.output, next_scheduled): From 4dc29f4640a17d52df493e5fe85b59a2e27a17ca Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 15:09:52 +0300 Subject: [PATCH 160/172] replace leftour `store` opcode with `assign` in tests --- tests/functional/venom/parser/test_parsing.py | 4 ++-- tests/unit/venom/test_const_expressions.py | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/functional/venom/parser/test_parsing.py b/tests/functional/venom/parser/test_parsing.py index b8bf075e37..34f1863bd6 100644 --- a/tests/functional/venom/parser/test_parsing.py +++ b/tests/functional/venom/parser/test_parsing.py @@ -382,8 +382,8 @@ def test_global_vars(): expected_ctx = IRContext() expected_ctx.add_function(main_fn := IRFunction(IRLabel("main"))) main_bb = main_fn.get_basic_block("main") - main_bb.append_instruction("store", IRLiteral(1), ret=IRVariable("1")) - main_bb.append_instruction("store", IRLiteral(2), ret=IRVariable("2")) + main_bb.append_instruction("assign", IRLiteral(1), ret=IRVariable("1")) + main_bb.append_instruction("assign", IRLiteral(2), ret=IRVariable("2")) main_bb.append_instruction("add", IRVariable("2"), IRVariable("1"), ret=IRVariable("3")) assert_ctx_eq(ctx, expected_ctx) diff --git a/tests/unit/venom/test_const_expressions.py b/tests/unit/venom/test_const_expressions.py index 5b685f66cc..c45d0d822d 100644 --- a/tests/unit/venom/test_const_expressions.py +++ b/tests/unit/venom/test_const_expressions.py @@ -139,11 +139,11 @@ def test_venom_label_addresses(): instructions = bb.instructions # Labels in instructions should be IRLabel objects - assert instructions[0].opcode == "store" + assert instructions[0].opcode == "assign" assert isinstance(instructions[0].operands[0], IRLabel) assert instructions[0].operands[0].value == "data_label" - assert instructions[1].opcode == "store" + assert instructions[1].opcode == "assign" assert isinstance(instructions[1].operands[0], IRLabel) assert instructions[1].operands[0].value == "computed_label" @@ -178,16 +178,16 @@ def test_venom_instruction_operands(): instructions = bb.instructions # Check store instructions have evaluated operands - assert instructions[0].opcode == "store" + assert instructions[0].opcode == "assign" assert instructions[0].operands[0].value == 32 - assert instructions[1].opcode == "store" + assert instructions[1].opcode == "assign" assert instructions[1].operands[0].value == 128 - assert instructions[2].opcode == "store" + assert instructions[2].opcode == "assign" assert instructions[2].operands[0].value == 0x2010 - assert instructions[3].opcode == "store" + assert instructions[3].opcode == "assign" assert instructions[3].operands[0].value == 64 @@ -341,7 +341,7 @@ def test_venom_with_undefined_constants(): instructions = bb.instructions # First instruction should have resolved value - assert instructions[0].opcode == "store" + assert instructions[0].opcode == "assign" assert isinstance(instructions[0].operands[0], IRLiteral) assert instructions[0].operands[0].value == 100 From d7a61a4f49dc539449d74a4ce1798a194052d3e3 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 15:24:14 +0300 Subject: [PATCH 161/172] remove PUSHSYMBOL class and its uses --- vyper/evm/assembler/core.py | 93 ------------------------------------- 1 file changed, 93 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index ad88286c25..daf0bd6aec 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -86,29 +86,6 @@ def __eq__(self, other): def __hash__(self): return hash((self.label, self.ofst)) - -class PUSHSYMBOL: - """ - Universal symbol reference instruction that can handle any symbol type. - The assembler will resolve whether it's a label, constant, or const reference. - """ - - def __init__(self, symbol_name: str): - assert isinstance(symbol_name, str) - self.symbol_name = symbol_name - - def __repr__(self): - return f"PUSHSYMBOL {self.symbol_name}" - - def __eq__(self, other): - if not isinstance(other, PUSHSYMBOL): - return False - return self.symbol_name == other.symbol_name - - def __hash__(self): - return hash(self.symbol_name) - - class DATA_ITEM: def __init__(self, item: bytes | Label): self.data = item @@ -397,28 +374,6 @@ def resolve_symbols( elif isinstance(item, (PUSHLABEL, PUSHLABELJUMPDEST)): pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits - elif isinstance(item, PUSHSYMBOL): - # Determine symbol type and appropriate size - symbol_name = item.symbol_name - - # Strip $ prefix if present for const references - if symbol_name.startswith("$"): - const_name = symbol_name[1:] - if CONSTREF(const_name) in symbol_map: - # Check if this is a label-dependent constant - if const_name in label_dependent_consts: - pc += SYMBOL_SIZE + 1 # PUSH2 for label-dependent constants - else: - # Calculate actual size for pure constants - val = symbol_map[CONSTREF(const_name)] - pc += calc_push_size(val) - else: - # Assume it will be a label-dependent constant - pc += SYMBOL_SIZE + 1 - else: - # It's either a label or will be resolved as one - pc += SYMBOL_SIZE + 1 # PUSH2 for labels - elif isinstance(item, PUSH_OFST): assert isinstance(item.ofst, int), item # [PUSH_OFST, (Label foo), bar] -> PUSH2 (foo+bar) @@ -660,54 +615,6 @@ def _assembly_to_evm( bytecode = _compile_push_instruction(PUSH_N(symbol_map[label], n=SYMBOL_SIZE)) ret.extend(bytecode) - elif isinstance(item, PUSHSYMBOL): - # Resolve the symbol and push appropriate value - symbol_name = item.symbol_name - - # Handle const references (starting with $) - if symbol_name.startswith("$"): - const_name = symbol_name[1:] - const_ref = CONSTREF(const_name) - if const_ref in symbol_map: - val = symbol_map[const_ref] - # Check if this is a label-dependent constant - if const_name in label_dependent_consts: - # Use PUSH2 for label-dependent constants - bytecode = _compile_push_instruction(PUSH_N(val, SYMBOL_SIZE)) - else: - # Use optimal size for pure constants - bytecode = _compile_push_instruction(PUSH(val)) - ret.extend(bytecode) - else: - raise CompilerPanic(f"Undefined constant reference: {symbol_name}") - else: - # Try as a label first - label = Label(symbol_name) - if label in symbol_map: - bytecode = _compile_push_instruction(PUSH_N(symbol_map[label], n=SYMBOL_SIZE)) - ret.extend(bytecode) - else: - # Try as a constant without $ prefix - const_ref = CONSTREF(symbol_name) - if const_ref in symbol_map: - val = symbol_map[const_ref] - # Check if this is a label-dependent constant - if symbol_name in label_dependent_consts: - # Use PUSH2 for label-dependent constants - bytecode = _compile_push_instruction(PUSH_N(val, SYMBOL_SIZE)) - else: - # Use optimal size for pure constants - bytecode = _compile_push_instruction(PUSH(val)) - ret.extend(bytecode) - else: - # Symbol not found. This will raise KeyError with the - # actual undefined symbol. - # This is the same behavior as PUSHLABEL. - bytecode = _compile_push_instruction( - PUSH_N(symbol_map[label], n=SYMBOL_SIZE) - ) - ret.extend(bytecode) - elif isinstance(item, JUMPDEST): jumpdest_opcode = get_opcodes()["JUMPDEST"][0] assert jumpdest_opcode is not None # help mypy From 96e723d61367040c3eae9f9f138b8b7cbfc9f4a4 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 15:33:56 +0300 Subject: [PATCH 162/172] rename is_symbol function to is_label for clarity and update references --- vyper/evm/assembler/core.py | 3 ++- vyper/evm/assembler/optimizer.py | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index daf0bd6aec..ec374e08d4 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -143,7 +143,8 @@ def mkdebug(pc_debugger, ast_source): return [i] -def is_symbol(i): +def is_label(i): + """Check if an item is a Label instance.""" return isinstance(i, Label) diff --git a/vyper/evm/assembler/optimizer.py b/vyper/evm/assembler/optimizer.py index a68714d755..af2e2b93dd 100644 --- a/vyper/evm/assembler/optimizer.py +++ b/vyper/evm/assembler/optimizer.py @@ -6,7 +6,7 @@ PUSHLABEL, PUSHLABELJUMPDEST, Label, - is_symbol, + is_label, ) from vyper.evm.assembler.symbols import CONSTREF, BaseConstOp from vyper.exceptions import CompilerPanic @@ -103,8 +103,8 @@ def _merge_jumpdests(assembly): changed = False i = 0 while i < len(assembly) - 2: - # if is_symbol(assembly[i]) and assembly[i + 1] == "JUMPDEST": - if is_symbol(assembly[i]): + # if is_label(assembly[i]) and assembly[i + 1] == "JUMPDEST": + if is_label(assembly[i]): current_symbol = assembly[i] # Skip merging if current symbol is used as data @@ -112,7 +112,7 @@ def _merge_jumpdests(assembly): i += 1 continue - if is_symbol(assembly[i + 1]): + if is_label(assembly[i + 1]): # LABEL x LABEL y # Only merge jump destinations, not data references new_symbol = assembly[i + 1] From f7a54bb60d9098386c01b1be5a0856dc4aa68c9f Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 15:34:28 +0300 Subject: [PATCH 163/172] cleanup unsused --- vyper/evm/assembler/core.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index ec374e08d4..c0907a88a2 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -194,7 +194,6 @@ def _resolve_constants( elif isinstance(item, BaseConstOp): all_const_names.add(item.name) - potential_label_refs = set() for item in assembly: if isinstance(item, BaseConstOp): # Check if any operand is a string that could be a label @@ -204,7 +203,6 @@ def _resolve_constants( if operand not in all_const_names: # This could be a label reference label_dependent_consts.add(item.name) - potential_label_refs.add(operand) max_iterations = 100 # Prevent infinite loops from circular dependencies iterations = 0 From d6bbf5f5c86e4bd1a3fcffb360f07049d9d44649 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 15:36:04 +0300 Subject: [PATCH 164/172] refactor --- vyper/evm/assembler/core.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index c0907a88a2..8d2f9db9a0 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -498,13 +498,12 @@ def get_data_segment_lengths(assembly: list[AssemblyInstruction]) -> list[int]: def _compile_data_item(item: DATA_ITEM, symbol_map: dict[SymbolKey, int]) -> bytes: if isinstance(item.data, bytes): return item.data - if isinstance(item.data, Label): + elif isinstance(item.data, Label): if item.data not in symbol_map: raise CompilerPanic(f"Unresolved label in data section: {item.data}") - symbolbytes = symbol_map[item.data].to_bytes(SYMBOL_SIZE, "big") - return symbolbytes - - raise CompilerPanic(f"Invalid data {type(item.data)}, {item.data}") # pragma: nocover + return symbol_map[item.data].to_bytes(SYMBOL_SIZE, "big") + else: + raise CompilerPanic(f"Invalid data {type(item.data)}, {item.data}") # pragma: nocover # helper function From 808a019e3849769ece531272214b5cdf1d57d898 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 15:36:54 +0300 Subject: [PATCH 165/172] extract label-dependent constants into a separate function --- vyper/evm/assembler/core.py | 47 +++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 8d2f9db9a0..2977eb6bec 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -168,6 +168,34 @@ def _add_to_symbol_map(symbol_map: dict[SymbolKey, int], item: SymbolKey, value: symbol_map[item] = value +def _extract_label_dependent_constants( + assembly: list[AssemblyInstruction], symbol_map: dict[SymbolKey, int] +) -> set[str]: + """ + Extract constants that depend on labels from the assembly. + """ + label_dependent_consts = set() + for item in assembly: + if isinstance(item, BaseConstOp): + # Check if this constant references labels + for operand in [item.op1, item.op2]: + if isinstance(operand, str) and Label(operand) in symbol_map: + label_dependent_consts.add(item.name) + + # Propagate label dependency + changed = True + while changed: + changed = False + for item in assembly: + if isinstance(item, BaseConstOp) and item.name not in label_dependent_consts: + for operand in [item.op1, item.op2]: + if isinstance(operand, str) and operand in label_dependent_consts: + label_dependent_consts.add(item.name) + changed = True + + return label_dependent_consts + + def _resolve_constants( assembly: list[AssemblyInstruction], symbol_map: dict[SymbolKey, int] ) -> set[str]: @@ -555,24 +583,7 @@ def assembly_to_evm(assembly: list[AssemblyInstruction]) -> tuple[bytes, dict[st _validate_assembly_jumps(assembly, symbol_map) # Extract label-dependent constants from the assembly for bytecode generation - label_dependent_consts = set() - for item in assembly: - if isinstance(item, BaseConstOp): - # Check if this constant references labels - for operand in [item.op1, item.op2]: - if isinstance(operand, str) and Label(operand) in symbol_map: - label_dependent_consts.add(item.name) - - # Propagate label dependency - changed = True - while changed: - changed = False - for item in assembly: - if isinstance(item, BaseConstOp) and item.name not in label_dependent_consts: - for operand in [item.op1, item.op2]: - if isinstance(operand, str) and operand in label_dependent_consts: - label_dependent_consts.add(item.name) - changed = True + label_dependent_consts = _extract_label_dependent_constants(assembly, symbol_map) bytecode = _assembly_to_evm(assembly, symbol_map, label_dependent_consts) return bytecode, source_map From 3e5cc975c8c5291ef48b9b7eeec573a624d30435 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 15:37:42 +0300 Subject: [PATCH 166/172] implement _resolve_push_ofst_value function for PUSH_OFST offset resolution --- vyper/evm/assembler/core.py | 56 ++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 2977eb6bec..ad16f1ee71 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -547,6 +547,26 @@ def _compile_push_instruction(assembly: list[AssemblyInstruction]) -> bytes: return bytes(ret) +def _resolve_push_ofst_value( + item: PUSH_OFST, symbol_map: dict[SymbolKey, int] +) -> int: + """Resolve the offset value for a PUSH_OFST instruction.""" + if isinstance(item.label, Label): + return symbol_map[item.label] + item.ofst + + assert isinstance(item.label, CONSTREF) + const_name = item.label.label + + # Try to look up as a CONSTREF first + if item.label in symbol_map: + return symbol_map[item.label] + item.ofst + # If not found as CONSTREF, try as a Label + elif Label(const_name) in symbol_map: + return symbol_map[Label(const_name)] + item.ofst + else: + raise CompilerPanic(f"Unknown symbol: {const_name}") + + def _validate_assembly_jumps(assembly: list[AssemblyInstruction], symbol_map: dict[SymbolKey, int]): """ Validate assembly jumpdest and jump references for correctness before generating bytecode @@ -632,36 +652,26 @@ def _assembly_to_evm( elif isinstance(item, PUSH_OFST): # PUSH_OFST (LABEL foo) 32 # PUSH_OFST (const foo) 32 - if isinstance(item.label, Label): - ofst = symbol_map[item.label] + item.ofst - bytecode = _compile_push_instruction(PUSH_N(ofst, SYMBOL_SIZE)) - else: - assert isinstance(item.label, CONSTREF) + ofst = _resolve_push_ofst_value(item, symbol_map) + + # Determine if we need fixed size or optimal size + use_fixed_size = isinstance(item.label, Label) + if isinstance(item.label, CONSTREF): const_name = item.label.label - - # Try to look up as a CONSTREF first - if item.label in symbol_map: - ofst = symbol_map[item.label] + item.ofst - # If not found as CONSTREF, try as a Label - elif Label(const_name) in symbol_map: - ofst = symbol_map[Label(const_name)] + item.ofst - else: - raise CompilerPanic(f"Unknown symbol: {const_name}") - - # Check if this is a label-dependent constant if const_name in label_dependent_consts: - # Use PUSH2 for label-dependent constants - # Also validate the value fits in 16 bits + use_fixed_size = True + # Validate the value fits in 16 bits if ofst > 0xFFFF: raise CompilerPanic( f"PUSH_OFST with label-dependent constant '{const_name}' " f"has value {ofst} which exceeds 16-bit limit" ) - bytecode = _compile_push_instruction(PUSH_N(ofst, SYMBOL_SIZE)) - else: - # Use optimal size for non-label-dependent constants - bytecode = _compile_push_instruction(PUSH(ofst)) - + + if use_fixed_size: + bytecode = _compile_push_instruction(PUSH_N(ofst, SYMBOL_SIZE)) + else: + bytecode = _compile_push_instruction(PUSH(ofst)) + ret.extend(bytecode) elif isinstance(item, int): From b61c5f0ab54bda0bae19870817d154a800a20800 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 15:38:09 +0300 Subject: [PATCH 167/172] update left out is_symbol --- vyper/evm/assembler/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index ad16f1ee71..a74024f882 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -509,7 +509,7 @@ def get_data_segment_lengths(assembly: list[AssemblyInstruction]) -> list[int]: continue # Add to current segment length - if is_symbol(item.data): + if isinstance(item.data, Label): current_segment_length += SYMBOL_SIZE elif isinstance(item.data, bytes): current_segment_length += len(item.data) From 5cf496dba34ffb233a86a1b6bb386102f2518397 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 15:38:45 +0300 Subject: [PATCH 168/172] lint --- vyper/evm/assembler/core.py | 17 ++++++++--------- vyper/venom/venom_to_assembly.py | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index a74024f882..e7437a5d0b 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -86,6 +86,7 @@ def __eq__(self, other): def __hash__(self): return hash((self.label, self.ofst)) + class DATA_ITEM: def __init__(self, item: bytes | Label): self.data = item @@ -192,7 +193,7 @@ def _extract_label_dependent_constants( if isinstance(operand, str) and operand in label_dependent_consts: label_dependent_consts.add(item.name) changed = True - + return label_dependent_consts @@ -547,16 +548,14 @@ def _compile_push_instruction(assembly: list[AssemblyInstruction]) -> bytes: return bytes(ret) -def _resolve_push_ofst_value( - item: PUSH_OFST, symbol_map: dict[SymbolKey, int] -) -> int: +def _resolve_push_ofst_value(item: PUSH_OFST, symbol_map: dict[SymbolKey, int]) -> int: """Resolve the offset value for a PUSH_OFST instruction.""" if isinstance(item.label, Label): return symbol_map[item.label] + item.ofst - + assert isinstance(item.label, CONSTREF) const_name = item.label.label - + # Try to look up as a CONSTREF first if item.label in symbol_map: return symbol_map[item.label] + item.ofst @@ -653,7 +652,7 @@ def _assembly_to_evm( # PUSH_OFST (LABEL foo) 32 # PUSH_OFST (const foo) 32 ofst = _resolve_push_ofst_value(item, symbol_map) - + # Determine if we need fixed size or optimal size use_fixed_size = isinstance(item.label, Label) if isinstance(item.label, CONSTREF): @@ -666,12 +665,12 @@ def _assembly_to_evm( f"PUSH_OFST with label-dependent constant '{const_name}' " f"has value {ofst} which exceeds 16-bit limit" ) - + if use_fixed_size: bytecode = _compile_push_instruction(PUSH_N(ofst, SYMBOL_SIZE)) else: bytecode = _compile_push_instruction(PUSH(ofst)) - + ret.extend(bytecode) elif isinstance(item, int): diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index cb6635d499..ab83b53dc3 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -730,7 +730,7 @@ def _optimistic_swap(self, assembly, inst, next_liveness, stack): if not next_liveness: return - + next_scheduled = next_liveness.last() cost = 0 if not self.dfg.are_equivalent(inst.output, next_scheduled): From 580531ad31059da44cf1234f88921f25b5c3a04e Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 15:46:26 +0300 Subject: [PATCH 169/172] refactor to dataclasses --- vyper/evm/assembler/symbols.py | 56 +++++++++++----------------------- 1 file changed, 17 insertions(+), 39 deletions(-) diff --git a/vyper/evm/assembler/symbols.py b/vyper/evm/assembler/symbols.py index fb79ef0714..03673e461f 100644 --- a/vyper/evm/assembler/symbols.py +++ b/vyper/evm/assembler/symbols.py @@ -1,55 +1,33 @@ -class Label: - def __init__(self, label: str): - assert isinstance(label, str) - self.label = label +from dataclasses import dataclass - def __repr__(self): - return f"LABEL {self.label}" - def __eq__(self, other): - if not isinstance(other, Label): - return False - return self.label == other.label +@dataclass(frozen=True) +class Label: + label: str - def __hash__(self): - return hash(self.label) + def __repr__(self) -> str: + return f"LABEL {self.label}" +@dataclass(frozen=True) class CONSTREF: - def __init__(self, label: str): - assert isinstance(label, str) - self.label = label + label: str - def __repr__(self): + def __repr__(self) -> str: return f"CONSTREF {self.label}" - def __eq__(self, other): - if not isinstance(other, CONSTREF): - return False - return self.label == other.label - - def __hash__(self): - return hash(self.label) - SymbolKey = Label | CONSTREF +@dataclass class CONST: - def __init__(self, name: str, value: int): - assert isinstance(name, str) - assert isinstance(value, int) - self.name = name - self.value = value + name: str + value: int - def __repr__(self): + def __repr__(self) -> str: return f"CONST {self.name} {self.value}" - def __eq__(self, other): - if not isinstance(other, CONST): - return False - return self.name == other.name and self.value == other.value - class BaseConstOp: def __init__(self, name: str, op1: str | int, op2: str | int): @@ -60,7 +38,7 @@ def __init__(self, name: str, op1: str | int, op2: str | int): self.op1 = op1 self.op2 = op2 - def __eq__(self, other): + def __eq__(self, other) -> bool: if not isinstance(other, type(self)): return False return self.name == other.name and self.op1 == other.op1 and self.op2 == other.op2 @@ -92,7 +70,7 @@ def _apply_operation(self, op1_val: int, op2_val: int) -> int: class CONST_ADD(BaseConstOp): - def __repr__(self): + def __repr__(self) -> str: return f"CONST_ADD {self.name} {self.op1} {self.op2}" def _apply_operation(self, op1_val: int, op2_val: int) -> int: @@ -100,7 +78,7 @@ def _apply_operation(self, op1_val: int, op2_val: int) -> int: class CONST_SUB(BaseConstOp): - def __repr__(self): + def __repr__(self) -> str: return f"CONST_SUB {self.name} {self.op1} {self.op2}" def _apply_operation(self, op1_val: int, op2_val: int) -> int: @@ -108,7 +86,7 @@ def _apply_operation(self, op1_val: int, op2_val: int) -> int: class CONST_MAX(BaseConstOp): - def __repr__(self): + def __repr__(self) -> str: return f"CONST_MAX {self.name} {self.op1} {self.op2}" def _apply_operation(self, op1_val: int, op2_val: int) -> int: From c4a12eb65402c66bb94e3bc87c7d1a3f3c4a074c Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 16:04:00 +0300 Subject: [PATCH 170/172] more dataclass refactor /split out instructions.py --- vyper/evm/assembler/core.py | 164 ++-------------------------- vyper/evm/assembler/instructions.py | 124 +++++++++++++++++++++ vyper/ir/compile_ir.py | 9 +- 3 files changed, 139 insertions(+), 158 deletions(-) create mode 100644 vyper/evm/assembler/instructions.py diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index e7437a5d0b..6dccdb71f2 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -1,168 +1,28 @@ from typing import Any from vyper.evm.assembler.constants import DUP_OFFSET, PUSH_OFFSET, SWAP_OFFSET +from vyper.evm.assembler.instructions import ( + DATA_ITEM, + JUMPDEST, + PUSH, + PUSH_N, + PUSH_OFST, + PUSHLABEL, + PUSHLABELJUMPDEST, + AssemblyInstruction, + TaggedInstruction, +) from vyper.evm.assembler.symbols import CONST, CONSTREF, BaseConstOp, Label, SymbolKey -from vyper.evm.opcodes import get_opcodes, version_check +from vyper.evm.opcodes import get_opcodes from vyper.exceptions import CompilerPanic from vyper.utils import OrderedSet -def num_to_bytearray(x): - o = [] - while x > 0: - o.insert(0, x % 256) - x //= 256 - return o - - -class JUMPDEST: - def __init__(self, label: Label): - assert isinstance(label, Label), label - self.label = label - - def __repr__(self): - return f"JUMPDEST {self.label.label}" - - -class PUSHLABEL: - def __init__(self, label: Label): - assert isinstance(label, Label), f"invalid label {type(label)} {label}" - self.label = label - - def __repr__(self): - return f"PUSHLABEL {self.label.label}" - - def __eq__(self, other): - if not isinstance(other, PUSHLABEL): - return False - return self.label == other.label - - def __hash__(self): - return hash(self.label) - - -class PUSHLABELJUMPDEST: - """ - This is a special case of PUSHLABEL that is used to push a label - that is used in a jump or return address. This is used to allow - the optimizer to remove jumpdests that are not used. - """ - - def __init__(self, label: Label): - assert isinstance(label, Label), label - self.label = label - - def __repr__(self): - return f"PUSHLABELJUMPDEST {self.label.label}" - - def __eq__(self, other): - if not isinstance(other, PUSHLABELJUMPDEST): - return False - return self.label == other.label - - def __hash__(self): - return hash(self.label) - - -# push the result of an addition (which might be resolvable at compile-time) -class PUSH_OFST: - def __init__(self, label: Label | CONSTREF, ofst: int): - # label can be Label or CONSTREF - assert isinstance(label, (Label, CONSTREF)) - self.label = label - self.ofst = ofst - - def __repr__(self): - label = self.label - if isinstance(label, Label): - label = label.label # str - return f"PUSH_OFST({label}, {self.ofst})" - - def __eq__(self, other): - if not isinstance(other, PUSH_OFST): - return False - return self.label == other.label and self.ofst == other.ofst - - def __hash__(self): - return hash((self.label, self.ofst)) - - -class DATA_ITEM: - def __init__(self, item: bytes | Label): - self.data = item - - def __repr__(self): - if isinstance(self.data, bytes): - return f"DATABYTES {self.data.hex()}" - elif isinstance(self.data, Label): - return f"DATALABEL {self.data.label}" - - -# a string (assembly instruction) but with additional metadata from the source code -class TaggedInstruction(str): - def __new__(cls, sstr, *args, **kwargs): - return super().__new__(cls, sstr) - - def __init__(self, sstr, ast_source=None, error_msg=None): - self.error_msg = error_msg - self.pc_debugger = False - - self.ast_source = ast_source - - -def PUSH(x): - bs = num_to_bytearray(x) - # starting in shanghai, can do push0 directly with no immediates - if len(bs) == 0 and not version_check(begin="shanghai"): - bs = [0] - return [f"PUSH{len(bs)}"] + bs - - -# push an exact number of bytes -def PUSH_N(x, n): - o = [] - for _i in range(n): - o.insert(0, x % 256) - x //= 256 - assert x == 0 - return [f"PUSH{len(o)}"] + o - - -def JUMP(label: Label): - return [PUSHLABELJUMPDEST(label), "JUMP"] - - -def JUMPI(label: Label): - return [PUSHLABELJUMPDEST(label), "JUMPI"] - - -def mkdebug(pc_debugger, ast_source): - # compile debug instructions - # (this is dead code -- CMC 2025-05-08) - i = TaggedInstruction("DEBUG", ast_source) - i.pc_debugger = pc_debugger - return [i] - - def is_label(i): """Check if an item is a Label instance.""" return isinstance(i, Label) -AssemblyInstruction = ( - str - | TaggedInstruction - | int - | Label - | PUSHLABEL - | PUSHLABELJUMPDEST - | JUMPDEST - | PUSH_OFST - | DATA_ITEM - | CONST -) - - def _add_to_symbol_map(symbol_map: dict[SymbolKey, int], item: SymbolKey, value: int): if item in symbol_map: # pragma: nocover raise CompilerPanic(f"duplicate label: {item}") diff --git a/vyper/evm/assembler/instructions.py b/vyper/evm/assembler/instructions.py new file mode 100644 index 0000000000..4bff548803 --- /dev/null +++ b/vyper/evm/assembler/instructions.py @@ -0,0 +1,124 @@ +from dataclasses import dataclass + +from vyper.evm.assembler.symbols import CONST, CONSTREF, Label +from vyper.evm.opcodes import version_check + + +@dataclass +class JUMPDEST: + label: Label + + def __repr__(self) -> str: + return f"JUMPDEST {self.label.label}" + + +@dataclass(frozen=True) +class PUSHLABEL: + label: Label + + def __repr__(self) -> str: + return f"PUSHLABEL {self.label.label}" + + +@dataclass(frozen=True) +class PUSHLABELJUMPDEST: + """ + This is a special case of PUSHLABEL that is used to push a label + that is used in a jump or return address. This is used to allow + the optimizer to remove jumpdests that are not used. + """ + + label: Label + + def __repr__(self) -> str: + return f"PUSHLABELJUMPDEST {self.label.label}" + + +# push the result of an addition (which might be resolvable at compile-time) +@dataclass(frozen=True) +class PUSH_OFST: + label: Label | CONSTREF + ofst: int + + def __repr__(self) -> str: + # Both Label and CONSTREF have a .label attribute that is a string + label_str = self.label.label + return f"PUSH_OFST({label_str}, {self.ofst})" + + +@dataclass +class DATA_ITEM: + data: bytes | Label + + def __repr__(self) -> str: + if isinstance(self.data, bytes): + return f"DATABYTES {self.data.hex()}" + elif isinstance(self.data, Label): + return f"DATALABEL {self.data.label}" + + +def num_to_bytearray(x): + o = [] + while x > 0: + o.insert(0, x % 256) + x //= 256 + return o + + +# a string (assembly instruction) but with additional metadata from the source code +class TaggedInstruction(str): + def __new__(cls, sstr, *args, **kwargs): + return super().__new__(cls, sstr) + + def __init__(self, _sstr, ast_source=None, error_msg=None): + self.error_msg = error_msg + self.pc_debugger = False + self.ast_source = ast_source + + +def PUSH(x): + bs = num_to_bytearray(x) + # starting in shanghai, can do push0 directly with no immediates + if len(bs) == 0 and not version_check(begin="shanghai"): + bs = [0] + return [f"PUSH{len(bs)}"] + bs + + +# push an exact number of bytes +def PUSH_N(x, n): + o = [] + for _i in range(n): + o.insert(0, x % 256) + x //= 256 + assert x == 0 + return [f"PUSH{len(o)}"] + o + + +def JUMP(label: Label): + return [PUSHLABELJUMPDEST(label), "JUMP"] + + +def JUMPI(label: Label): + return [PUSHLABELJUMPDEST(label), "JUMPI"] + + +def mkdebug(pc_debugger, ast_source): + # compile debug instructions + # (this is dead code -- CMC 2025-05-08) + i = TaggedInstruction("DEBUG", ast_source) + i.pc_debugger = pc_debugger + return [i] + + +AssemblyInstruction = ( + str + | TaggedInstruction + | int + | Label + | PUSHLABEL + | PUSHLABELJUMPDEST + | JUMPDEST + | PUSH_OFST + | DATA_ITEM + | CONST +) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 32f8176fd7..3181b88a26 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -8,9 +8,8 @@ from vyper.codegen.ir_node import IRnode from vyper.compiler.settings import OptimizationLevel -from vyper.evm.assembler.core import ( - CONST, - CONSTREF, +from vyper.evm.assembler.core import assembly_to_evm, get_data_segment_lengths +from vyper.evm.assembler.instructions import ( DATA_ITEM, JUMP, JUMPDEST, @@ -20,13 +19,11 @@ PUSHLABEL, PUSHLABELJUMPDEST, AssemblyInstruction, - Label, TaggedInstruction, - assembly_to_evm, - get_data_segment_lengths, mkdebug, ) from vyper.evm.assembler.optimizer import optimize_assembly +from vyper.evm.assembler.symbols import CONST, CONSTREF, Label from vyper.evm.opcodes import get_opcodes from vyper.exceptions import CodegenPanic, CompilerPanic from vyper.utils import MemoryPositions From 93d49801393d264b2a82cb46afa14934b0293f25 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Fri, 18 Jul 2025 16:14:49 +0300 Subject: [PATCH 171/172] use `len() == 0` instead of `not` --- vyper/venom/venom_to_assembly.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index ab83b53dc3..42b3708dfc 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -728,7 +728,7 @@ def _optimistic_swap(self, assembly, inst, next_liveness, stack): if DEBUG_SHOW_COST: stack0 = stack.copy() - if not next_liveness: + if len(next_liveness) == 0: return next_scheduled = next_liveness.last() From 55265df775abf4f139d7e2ef5a336784fe4eae33 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 1 Aug 2024 09:23:42 +0200 Subject: [PATCH 172/172] update test assertions empty db constructor jump error test add PC_RESET instruction pc reset stuff cleanup basicblock parser tag handling enhance unresolved constant check in VenomCompiler add CFG normalization check refactor stack handling refactor VenomCompiler to differentiate between constant references and labels in assembly generation import PC_RESET handle PC_RESET in resolve_symbols runtime actual position handling fix symbol map updates implement _resolve_push_ofst_value function for PUSH_OFST offset resolution --- tests/functional/venom/parser/test_parsing.py | 8 +- tests/functional/venom/test_empty_db.py | 39 ++++++ .../venom/test_runtime_deployment.py | 65 ++++++++++ vyper/evm/assembler/core.py | 116 +++++++++++++----- vyper/evm/assembler/instructions.py | 13 ++ vyper/evm/assembler/symbols.py | 14 ++- vyper/venom/basicblock.py | 4 +- vyper/venom/parser.py | 5 +- vyper/venom/venom_to_assembly.py | 52 ++++---- 9 files changed, 256 insertions(+), 60 deletions(-) create mode 100644 tests/functional/venom/test_empty_db.py create mode 100644 tests/functional/venom/test_runtime_deployment.py diff --git a/tests/functional/venom/parser/test_parsing.py b/tests/functional/venom/parser/test_parsing.py index 34f1863bd6..c615189da2 100644 --- a/tests/functional/venom/parser/test_parsing.py +++ b/tests/functional/venom/parser/test_parsing.py @@ -410,12 +410,12 @@ def test_labels_with_addresses(): main_fn = ctx.get_function(IRLabel("main")) assert main_fn is not None - # Labels inside functions don't have addresses + # Labels inside functions are just regular labels main_bb = main_fn.get_basic_block("main") - assert main_bb.label.address is None + assert main_bb is not None other_bb = main_fn.get_basic_block("other_block") - assert other_bb.label.address is None + assert other_bb is not None def test_labels_with_addresses_used_in_function(): @@ -439,7 +439,7 @@ def test_labels_with_addresses_used_in_function(): assert main_fn is not None other_bb = main_fn.get_basic_block("other_block") - assert other_bb.label.address is None + assert other_bb is not None add_inst = other_bb.instructions[0] assert add_inst.opcode == "add" diff --git a/tests/functional/venom/test_empty_db.py b/tests/functional/venom/test_empty_db.py new file mode 100644 index 0000000000..c4b3ef7c98 --- /dev/null +++ b/tests/functional/venom/test_empty_db.py @@ -0,0 +1,39 @@ +from vyper.venom.parser import parse_venom + + +def test_empty_db_instruction(): + """Test that db x"" is accepted by the parser.""" + venom_code = """ + function test_data { + test_data: + db x"" + } + """ + + ctx = parse_venom(venom_code) + + from vyper.venom.basicblock import IRLabel + assert IRLabel("test_data") in ctx.functions + + fn = ctx.functions[IRLabel("test_data")] + bb = fn.get_basic_block("test_data") + assert len(bb.instructions) == 1 + assert bb.instructions[0].opcode == "db" + + +def test_db_with_data(): + venom_code = """ + function test_data { + test_data: + db x"deadbeef" + } + """ + + ctx = parse_venom(venom_code) + + from vyper.venom.basicblock import IRLabel + assert IRLabel("test_data") in ctx.functions + fn = ctx.functions[IRLabel("test_data")] + bb = fn.get_basic_block("test_data") + assert len(bb.instructions) == 1 + assert bb.instructions[0].opcode == "db" \ No newline at end of file diff --git a/tests/functional/venom/test_runtime_deployment.py b/tests/functional/venom/test_runtime_deployment.py new file mode 100644 index 0000000000..f44bd64617 --- /dev/null +++ b/tests/functional/venom/test_runtime_deployment.py @@ -0,0 +1,65 @@ +import pytest + +from vyper.evm.assembler import assembly_to_evm +from vyper.venom.parser import parse_venom +from vyper.venom.resolve_const import resolve_const_operands +from vyper.venom.venom_to_assembly import VenomCompiler + + +def test_runtime_size_storage_deployment(env): + """This test demonstrates an InvalidJump error that occurs with certain Venom code structures. + """ + venom_code = """ +const RUNTIME_SIZE = sub(@runtime_end, @runtime) + +function __global { + __global: + invoke @constructor_StorageTest + %runtime_size = @RUNTIME_SIZE + %runtime_offset = @runtime + codecopy 0, %runtime_offset, %runtime_size + return 0, %runtime_size + + revert: [pinned] + revert 0, 0 +} ; close function __global + +function constructor_StorageTest { + constructor_StorageTest: + %1 = param + sstore 0, 42 + ret %1 +} ; close function constructor_StorageTest + +function runtime { + runtime: [pinned] + %1 = calldatasize + %2 = iszero %1 + jnz %2, @1_then, @2_join + + 1_then: + %3 = sload 0 + %value = %3 + mstore 0, %value + return 0, 32 + + 2_join: + revert 0, 0 +} ; close function runtime + +function runtime_end { + runtime_end: [pinned] + db x"" +} ; close function runtime_end +""" + + ctx = parse_venom(venom_code) + + resolve_const_operands(ctx) + + compiler = VenomCompiler(ctx) + assembly = compiler.generate_evm_assembly(no_optimize=True) + + bytecode, _ = assembly_to_evm(assembly) + + deployed_contract = env.deploy(abi=[], bytecode=bytecode) \ No newline at end of file diff --git a/vyper/evm/assembler/core.py b/vyper/evm/assembler/core.py index 6dccdb71f2..a834c2cf48 100644 --- a/vyper/evm/assembler/core.py +++ b/vyper/evm/assembler/core.py @@ -4,6 +4,7 @@ from vyper.evm.assembler.instructions import ( DATA_ITEM, JUMPDEST, + PC_RESET, PUSH, PUSH_N, PUSH_OFST, @@ -259,6 +260,66 @@ def resolve_symbols( elif isinstance(item, Label): _add_to_symbol_map(symbol_map, item, pc) + elif isinstance(item, PC_RESET): + # PC_RESET resets the program counter for calculating jump destinations + # Store the actual position for the runtime label specifically + # This is used when PUSH_OFST references @runtime for CODECOPY + # PC_RESET itself doesn't generate bytecode + + # Store the actual PC before resetting for size calculations + actual_pc = pc + + # Look ahead to find runtime-related labels and store their actual positions + # These are used for size calculations (e.g., RUNTIME_SIZE) + j = i + 1 + while j < len(assembly): + next_item = assembly[j] + if isinstance(next_item, JUMPDEST) and hasattr(next_item.label, 'label'): + label_name = next_item.label.label + if label_name == "runtime": + # Store actual position for runtime offset calculations + symbol_map[Label("__runtime_actual__")] = actual_pc + elif label_name == "runtime_end": + # Store actual position for size calculations + # runtime_end actual position = current actual_pc + bytes until runtime_end + # We need to count bytes from here to runtime_end + bytes_to_end = 0 + for k in range(i + 1, j + 1): + item_k = assembly[k] + if isinstance(item_k, JUMPDEST): + bytes_to_end += 1 + elif isinstance(item_k, (PUSHLABEL, PUSHLABELJUMPDEST)): + bytes_to_end += 3 # PUSH2 + elif isinstance(item_k, PUSH_OFST): + bytes_to_end += 3 # PUSH2 + elif isinstance(item_k, int): + bytes_to_end += 1 + elif isinstance(item_k, str) and item_k in get_opcodes(): + bytes_to_end += 1 + symbol_map[Label("__runtime_end_actual__")] = actual_pc + bytes_to_end + break + elif isinstance(next_item, Label): + if next_item.label == "runtime_end": + # Calculate actual position similarly + bytes_to_end = 0 + for k in range(i + 1, j): + item_k = assembly[k] + if isinstance(item_k, JUMPDEST): + bytes_to_end += 1 + elif isinstance(item_k, (PUSHLABEL, PUSHLABELJUMPDEST)): + bytes_to_end += 3 + elif isinstance(item_k, PUSH_OFST): + bytes_to_end += 3 + elif isinstance(item_k, int): + bytes_to_end += 1 + elif isinstance(item_k, str) and item_k in get_opcodes(): + bytes_to_end += 1 + symbol_map[Label("__runtime_end_actual__")] = actual_pc + bytes_to_end + break + j += 1 + + pc = item.value # Reset PC to specified value (usually 0) + elif isinstance(item, (PUSHLABEL, PUSHLABELJUMPDEST)): pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits @@ -269,21 +330,8 @@ def resolve_symbols( if isinstance(item.label, Label): pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits elif isinstance(item.label, CONSTREF): - # Check if this constant depends on labels - const_name = item.label.label - if const_name in label_dependent_consts: - # Use fixed PUSH2 size for label-dependent constants - pc += SYMBOL_SIZE + 1 # PUSH2 highbits lowbits - else: - # For non-label-dependent constants, calculate actual size - # Try to look up as a CONSTREF first - if item.label in symbol_map: - const = symbol_map[item.label] - val = const + item.ofst - pc += calc_push_size(val) - else: - # Treat it as a label-dependent reference using PUSH2 size - pc += SYMBOL_SIZE + 1 # PUSH2 + # Always use PUSH2 size for CONSTREFs for now + pc += SYMBOL_SIZE + 1 # PUSH2 else: # pragma: nocover raise CompilerPanic(f"invalid ofst {item.label}") @@ -419,10 +467,22 @@ def _resolve_push_ofst_value(item: PUSH_OFST, symbol_map: dict[SymbolKey, int]) # Try to look up as a CONSTREF first if item.label in symbol_map: return symbol_map[item.label] + item.ofst - # If not found as CONSTREF, try as a Label + # If not found as CONSTREF, try as a Label (exact match) elif Label(const_name) in symbol_map: + # Special case: if this is the runtime label and we have stored its actual position, + # use the actual PC instead of the reset PC for offset calculations + if const_name == "runtime" and Label("__runtime_actual__") in symbol_map: + return symbol_map[Label("__runtime_actual__")] + item.ofst return symbol_map[Label(const_name)] + item.ofst else: + # Search for any Label with this name + for key in symbol_map: + if isinstance(key, Label) and key.label == const_name: + # Special case: if this is the runtime label and we have stored its actual position, + # use the actual PC instead of the reset PC for offset calculations + if const_name == "runtime" and Label("__runtime_actual__") in symbol_map: + return symbol_map[Label("__runtime_actual__")] + item.ofst + return symbol_map[key] + item.ofst raise CompilerPanic(f"Unknown symbol: {const_name}") @@ -496,6 +556,8 @@ def _assembly_to_evm( continue # CONST operations do not show up in bytecode elif isinstance(item, Label): continue # Label does not show up in bytecode + elif isinstance(item, PC_RESET): + continue # PC_RESET does not show up in bytecode elif isinstance(item, (PUSHLABEL, PUSHLABELJUMPDEST)): # push a symbol to stack @@ -513,20 +575,14 @@ def _assembly_to_evm( # PUSH_OFST (const foo) 32 ofst = _resolve_push_ofst_value(item, symbol_map) - # Determine if we need fixed size or optimal size - use_fixed_size = isinstance(item.label, Label) - if isinstance(item.label, CONSTREF): - const_name = item.label.label - if const_name in label_dependent_consts: - use_fixed_size = True - # Validate the value fits in 16 bits - if ofst > 0xFFFF: - raise CompilerPanic( - f"PUSH_OFST with label-dependent constant '{const_name}' " - f"has value {ofst} which exceeds 16-bit limit" - ) - - if use_fixed_size: + # Use fixed PUSH2 size for Labels and CONSTREFs to match symbol resolution + if isinstance(item.label, (Label, CONSTREF)): + # Validate the value fits in 16 bits + if ofst > 0xFFFF: + label_name = item.label.label if isinstance(item.label, CONSTREF) else str(item.label) + raise CompilerPanic( + f"PUSH_OFST with '{label_name}' has value {ofst} which exceeds PUSH2 limit" + ) bytecode = _compile_push_instruction(PUSH_N(ofst, SYMBOL_SIZE)) else: bytecode = _compile_push_instruction(PUSH(ofst)) diff --git a/vyper/evm/assembler/instructions.py b/vyper/evm/assembler/instructions.py index 4bff548803..73e53015ca 100644 --- a/vyper/evm/assembler/instructions.py +++ b/vyper/evm/assembler/instructions.py @@ -46,6 +46,19 @@ def __repr__(self) -> str: return f"PUSH_OFST({label_str}, {self.ofst})" +@dataclass +class PC_RESET: + """ + Special instruction to reset PC counter for runtime code sections. + This allows jump destinations within the section to be calculated + relative to the reset point rather than absolute positions. + """ + value: int = 0 # The value to reset PC to (usually 0) + + def __repr__(self) -> str: + return f"PC_RESET {self.value}" + + @dataclass class DATA_ITEM: data: bytes | Label diff --git a/vyper/evm/assembler/symbols.py b/vyper/evm/assembler/symbols.py index 03673e461f..e9d0164034 100644 --- a/vyper/evm/assembler/symbols.py +++ b/vyper/evm/assembler/symbols.py @@ -49,10 +49,22 @@ def _resolve_operand(self, operand: str | int, symbol_map: dict[SymbolKey, int]) op_ref = CONSTREF(operand) if op_ref in symbol_map: return symbol_map[op_ref] - # Try as Label + + # Special handling for runtime labels in size calculations + # Use actual positions for RUNTIME_SIZE calculation + if operand == "runtime" and Label("__runtime_actual__") in symbol_map: + return symbol_map[Label("__runtime_actual__")] + elif operand == "runtime_end" and Label("__runtime_end_actual__") in symbol_map: + return symbol_map[Label("__runtime_end_actual__")] + + # Try as Label - first try exact match label = Label(operand) if label in symbol_map: return symbol_map[label] + # If not found, search for any Label with this name + for key in symbol_map: + if isinstance(key, Label) and key.label == operand: + return symbol_map[key] elif isinstance(operand, int): return operand return None diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 377e7a294e..4719ce4da1 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -219,13 +219,11 @@ class IRLabel(IROperand): # (like a function name, try to preserve it in optimization passes) is_symbol: bool = False value: str - address: Optional[int] = None # optional address override - def __init__(self, value: str, is_symbol: bool = False, address: Optional[int] = None) -> None: + def __init__(self, value: str, is_symbol: bool = False) -> None: assert isinstance(value, str), f"not a str: {value} ({type(value)})" assert len(value) > 0 self.is_symbol = is_symbol - self.address = address super().__init__(value) _IS_IDENTIFIER = re.compile("[0-9a-zA-Z_]*") diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py index 5a2beb2e20..41563329db 100644 --- a/vyper/venom/parser.py +++ b/vyper/venom/parser.py @@ -68,7 +68,7 @@ DOUBLE_QUOTE: "\\"" IDENT: (DIGIT|LETTER|"_")+ DB: "db" - HEXSTR: "x" DOUBLE_QUOTE (HEXDIGIT|"_")+ DOUBLE_QUOTE + HEXSTR: "x" DOUBLE_QUOTE (HEXDIGIT|"_")* DOUBLE_QUOTE CONST: SIGNED_INT | "0x" HEXDIGIT+ # Constant expressions @@ -263,6 +263,9 @@ def label_decl(self, children) -> _LabelDecl: # Skip NEWLINE tokens if hasattr(child, "type") and child.type == "NEWLINE": continue + elif hasattr(child, "data") and child.data == "tag_list": + # Transform the tag_list tree + tags = self.transform(child) elif isinstance(child, list): # tag_list returns a list tags = child diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 42b3708dfc..631487b6de 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -272,6 +272,13 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr for fn in self.ctx.functions.values(): ac = IRAnalysesCache(fn) + + # Run CFG normalization if needed + from vyper.venom.passes.cfg_normalization import CFGNormalization + cfg_check = ac.request_analysis(CFGAnalysis) + if not cfg_check.is_normalized(): + CFGNormalization(ac, fn).run_pass() + ac.invalidate_analysis(CFGAnalysis) self.liveness = ac.request_analysis(LivenessAnalysis) self.dfg = ac.request_analysis(DFGAnalysis) @@ -469,9 +476,12 @@ def clean_stack_from_cfg_in( self, asm: list, basicblock: IRBasicBlock, stack: StackModel ) -> None: # the input block is a splitter block, like jnz or djmp - assert len(in_bbs := self.cfg.cfg_in(basicblock)) == 1 + in_bbs = self.cfg.cfg_in(basicblock) + if len(in_bbs) != 1: + return in_bb = in_bbs.first() - assert len(self.cfg.cfg_out(in_bb)) > 1 + if len(self.cfg.cfg_out(in_bb)) <= 1: + return # inputs is the input variables we need from in_bb inputs = self.liveness.input_vars_from(in_bb, basicblock) @@ -525,10 +535,6 @@ def _generate_evm_for_instruction( operands = inst.operands[1:] elif opcode == "db": operands = [] - elif opcode == "revert": - # Filter out literals from revert operands for stack reordering - # since literals are handled directly in _emit_input_operands - operands = [op for op in inst.operands if not isinstance(op, IRLiteral)] else: operands = inst.operands @@ -569,22 +575,26 @@ def _generate_evm_for_instruction( assert len(self.cfg.cfg_out(inst.parent)) == 1 next_bb = self.cfg.cfg_out(inst.parent).first() - # guaranteed by cfg normalization+simplification - assert len(self.cfg.cfg_in(next_bb)) > 1 - - target_stack = self.liveness.input_vars_from(inst.parent, next_bb) - # NOTE: in general the stack can contain multiple copies of - # the same variable, however, before a jump that is not possible - self._stack_reorder(assembly, stack, list(target_stack)) - - if inst.is_commutative: - cost_no_swap = self._stack_reorder([], stack, operands, dry_run=True) - operands[-1], operands[-2] = operands[-2], operands[-1] - cost_with_swap = self._stack_reorder([], stack, operands, dry_run=True) + if len(self.cfg.cfg_in(next_bb)) > 1: + target_stack = self.liveness.input_vars_from(inst.parent, next_bb) + # NOTE: in general the stack can contain multiple copies of + # the same variable, however, before a jump that is not possible + self._stack_reorder(assembly, stack, list(target_stack)) + + # Filter out literals for stack reordering - they are not stack variables + stack_operands = [op for op in operands if not isinstance(op, IRLiteral)] + + if inst.is_commutative and len(stack_operands) >= 2: + cost_no_swap = self._stack_reorder([], stack, stack_operands, dry_run=True) + stack_operands[-1], stack_operands[-2] = stack_operands[-2], stack_operands[-1] + cost_with_swap = self._stack_reorder([], stack, stack_operands, dry_run=True) if cost_with_swap > cost_no_swap: + stack_operands[-1], stack_operands[-2] = stack_operands[-2], stack_operands[-1] + # Apply the same swap to the full operands list + if cost_with_swap <= cost_no_swap: operands[-1], operands[-2] = operands[-2], operands[-1] - cost = self._stack_reorder([], stack, operands, dry_run=True) + cost = self._stack_reorder([], stack, stack_operands, dry_run=True) if DEBUG_SHOW_COST and cost: print("ENTER", inst, file=sys.stderr) print(" HAVE", stack, file=sys.stderr) @@ -592,8 +602,8 @@ def _generate_evm_for_instruction( print(" COST", cost, file=sys.stderr) # final step to get the inputs to this instruction ordered - # correctly on the stack - self._stack_reorder(assembly, stack, operands) + # correctly on the stack (excluding literals which are not on stack) + self._stack_reorder(assembly, stack, stack_operands) # some instructions (i.e. invoke) need to do stack manipulations # with the stack model containing the return value(s), so we fiddle