diff --git a/Python/jit.c b/Python/jit.c index 01ec9c1fa6e8a9..a557f08701a6c1 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -167,11 +167,13 @@ set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start, // See https://developer.arm.com/documentation/ddi0602/2023-09/Base-Instructions // for instruction encodings: -#define IS_AARCH64_ADD_OR_SUB(I) (((I) & 0x11C00000) == 0x11000000) -#define IS_AARCH64_ADRP(I) (((I) & 0x9F000000) == 0x90000000) -#define IS_AARCH64_BRANCH(I) (((I) & 0x7C000000) == 0x14000000) -#define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000) -#define IS_AARCH64_MOV(I) (((I) & 0x9F800000) == 0x92800000) +#define IS_AARCH64_ADD_OR_SUB(I) (((I) & 0x11C00000) == 0x11000000) +#define IS_AARCH64_ADRP(I) (((I) & 0x9F000000) == 0x90000000) +#define IS_AARCH64_BRANCH(I) (((I) & 0x7C000000) == 0x14000000) +#define IS_AARCH64_BRANCH_COND(I) (((I) & 0x7C000000) == 0x54000000) +#define IS_AARCH64_TEST_AND_BRANCH(I) (((I) & 0x7E000000) == 0x36000000) +#define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000) +#define IS_AARCH64_MOV(I) (((I) & 0x9F800000) == 0x92800000) // LLD is a great reference for performing relocations... just keep in // mind that Tools/jit/build.py does filtering and preprocessing for us! @@ -332,6 +334,37 @@ patch_aarch64_21rx(unsigned char *location, uint64_t value) patch_aarch64_21r(location, value); } + +// 21-bit relative branch. +void +patch_aarch64_19r(unsigned char *location, uint64_t value) +{ + uint32_t *loc32 = (uint32_t *)location; + assert(IS_AARCH64_BRANCH_COND(*loc32)); + value -= (uintptr_t)location; + // Check that we're not out of range of 21 signed bits: + assert((int64_t)value >= -(1 << 20)); + assert((int64_t)value < (1 << 20)); + // Since instructions are 4-byte aligned, only use 19 bits: + assert(get_bits(value, 0, 2) == 0); + set_bits(loc32, 5, value, 2, 19); +} + +// 16-bit relative branch. +void +patch_aarch64_14r(unsigned char *location, uint64_t value) +{ + uint32_t *loc32 = (uint32_t *)location; + assert(IS_AARCH64_TEST_AND_BRANCH(*loc32)); + value -= (uintptr_t)location; + // Check that we're not out of range of 16 signed bits: + assert((int64_t)value >= -(1 << 15)); + assert((int64_t)value < (1 << 15)); + // Since instructions are 4-byte aligned, only use 14 bits: + assert(get_bits(value, 0, 2) == 0); + set_bits(loc32, 5, value, 2, 14); +} + // 28-bit relative branch. void patch_aarch64_26r(unsigned char *location, uint64_t value) diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py index 33db110b728dba..c950cb4908f39b 100644 --- a/Tools/jit/_optimizers.py +++ b/Tools/jit/_optimizers.py @@ -39,6 +39,34 @@ # Update with all of the inverted branches, too: _X86_BRANCHES |= {v: k for k, v in _X86_BRANCHES.items() if v} +_AARCH64_COND_CODES = { + # https://developer.arm.com/documentation/dui0801/b/CJAJIHAD?lang=en + "eq": "ne", + "ne": "eq", + "lt": "ge", + "ge": "lt", + "gt": "le", + "le": "gt", + "vs": "vc", + "vc": "vs", + "mi": "pl", + "pl": "mi", + "cs": "cc", + "cc": "cs", + "hs": "lo", + "lo": "hs", + "hi": "ls", + "ls": "hi", +} +# Branches are either b.{cond} or bc.{cond} +_AARCH64_BRANCHES = { + "b." + cond: ("b." + inverse if inverse else None) + for (cond, inverse) in _AARCH64_COND_CODES.items() +} | { + "bc." + cond: ("bc." + inverse if inverse else None) + for (cond, inverse) in _AARCH64_COND_CODES.items() +} + @dataclasses.dataclass class _Block: @@ -286,8 +314,15 @@ def run(self) -> None: class OptimizerAArch64(Optimizer): # pylint: disable = too-few-public-methods """aarch64-apple-darwin/aarch64-pc-windows-msvc/aarch64-unknown-linux-gnu""" + _branches = _AARCH64_BRANCHES + _re_branch = re.compile( + rf"\s*(?P{'|'.join(_AARCH64_BRANCHES)})\s+(.+,\s+)*(?P[\w.]+)" + ) + # https://developer.arm.com/documentation/ddi0602/2025-03/Base-Instructions/B--Branch- _re_jump = re.compile(r"\s*b\s+(?P[\w.]+)") + # https://developer.arm.com/documentation/ddi0602/2025-09/Base-Instructions/RET--Return-from-subroutine- + _re_return = re.compile(r"\s*ret\b") class OptimizerX86(Optimizer): # pylint: disable = too-few-public-methods diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py index 228fc389584dd7..c47e9af924a20e 100644 --- a/Tools/jit/_schema.py +++ b/Tools/jit/_schema.py @@ -10,6 +10,7 @@ "ARM64_RELOC_PAGEOFF12", "ARM64_RELOC_UNSIGNED", "IMAGE_REL_AMD64_REL32", + "IMAGE_REL_ARM64_BRANCH19", "IMAGE_REL_ARM64_BRANCH26", "IMAGE_REL_ARM64_PAGEBASE_REL21", "IMAGE_REL_ARM64_PAGEOFFSET_12A", @@ -20,6 +21,7 @@ "R_AARCH64_ADR_GOT_PAGE", "R_AARCH64_ADR_PREL_PG_HI21", "R_AARCH64_CALL26", + "R_AARCH64_CONDBR19", "R_AARCH64_JUMP26", "R_AARCH64_ADD_ABS_LO12_NC", "R_AARCH64_LD64_GOT_LO12_NC", diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 14606b036db519..16bc1ea4e17e6b 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -61,6 +61,7 @@ class HoleValue(enum.Enum): # x86_64-pc-windows-msvc: "IMAGE_REL_AMD64_REL32": "patch_x86_64_32rx", # aarch64-pc-windows-msvc: + "IMAGE_REL_ARM64_BRANCH19": "patch_aarch64_19r", "IMAGE_REL_ARM64_BRANCH26": "patch_aarch64_26r", "IMAGE_REL_ARM64_PAGEBASE_REL21": "patch_aarch64_21rx", "IMAGE_REL_ARM64_PAGEOFFSET_12A": "patch_aarch64_12", @@ -74,6 +75,7 @@ class HoleValue(enum.Enum): "R_AARCH64_ADR_GOT_PAGE": "patch_aarch64_21rx", "R_AARCH64_ADR_PREL_PG_HI21": "patch_aarch64_21r", "R_AARCH64_CALL26": "patch_aarch64_26r", + "R_AARCH64_CONDBR19": "patch_aarch64_19r", "R_AARCH64_JUMP26": "patch_aarch64_26r", "R_AARCH64_LD64_GOT_LO12_NC": "patch_aarch64_12x", "R_AARCH64_MOVW_UABS_G0_NC": "patch_aarch64_16a", diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 2f3969e7d0540c..d723916be8500f 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -324,7 +324,8 @@ def _handle_relocation( "Offset": offset, "Symbol": s, "Type": { - "Name": "IMAGE_REL_ARM64_BRANCH26" + "Name": "IMAGE_REL_ARM64_BRANCH19" + | "IMAGE_REL_ARM64_BRANCH26" | "IMAGE_REL_ARM64_PAGEBASE_REL21" | "IMAGE_REL_ARM64_PAGEOFFSET_12A" | "IMAGE_REL_ARM64_PAGEOFFSET_12L" as kind