Skip to content

Commit 4abdaa1

Browse files
committed
Implement assembler optimization for AArch64.
Removes a number of unnecessary branches. Change-Id: I4965fe8d8b79f7d859ff9076d9c53f3ac7f094b2
1 parent f3d7fae commit 4abdaa1

File tree

4 files changed

+84
-5
lines changed

4 files changed

+84
-5
lines changed

Python/jit.c

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,11 +167,13 @@ set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start,
167167

168168
// See https://developer.arm.com/documentation/ddi0602/2023-09/Base-Instructions
169169
// for instruction encodings:
170-
#define IS_AARCH64_ADD_OR_SUB(I) (((I) & 0x11C00000) == 0x11000000)
171-
#define IS_AARCH64_ADRP(I) (((I) & 0x9F000000) == 0x90000000)
172-
#define IS_AARCH64_BRANCH(I) (((I) & 0x7C000000) == 0x14000000)
173-
#define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000)
174-
#define IS_AARCH64_MOV(I) (((I) & 0x9F800000) == 0x92800000)
170+
#define IS_AARCH64_ADD_OR_SUB(I) (((I) & 0x11C00000) == 0x11000000)
171+
#define IS_AARCH64_ADRP(I) (((I) & 0x9F000000) == 0x90000000)
172+
#define IS_AARCH64_BRANCH(I) (((I) & 0x7C000000) == 0x14000000)
173+
#define IS_AARCH64_BRANCH_COND(I) (((I) & 0x7C000000) == 0x54000000)
174+
#define IS_AARCH64_TEST_AND_BRANCH(I) (((I) & 0x7E000000) == 0x36000000)
175+
#define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000)
176+
#define IS_AARCH64_MOV(I) (((I) & 0x9F800000) == 0x92800000)
175177

176178
// LLD is a great reference for performing relocations... just keep in
177179
// mind that Tools/jit/build.py does filtering and preprocessing for us!
@@ -332,6 +334,37 @@ patch_aarch64_21rx(unsigned char *location, uint64_t value)
332334
patch_aarch64_21r(location, value);
333335
}
334336

337+
338+
// 21-bit relative branch.
339+
void
340+
patch_aarch64_19r(unsigned char *location, uint64_t value)
341+
{
342+
uint32_t *loc32 = (uint32_t *)location;
343+
assert(IS_AARCH64_BRANCH_COND(*loc32));
344+
value -= (uintptr_t)location;
345+
// Check that we're not out of range of 21 signed bits:
346+
assert((int64_t)value >= -(1 << 20));
347+
assert((int64_t)value < (1 << 20));
348+
// Since instructions are 4-byte aligned, only use 19 bits:
349+
assert(get_bits(value, 0, 2) == 0);
350+
set_bits(loc32, 5, value, 2, 19);
351+
}
352+
353+
// 16-bit relative branch.
354+
void
355+
patch_aarch64_14r(unsigned char *location, uint64_t value)
356+
{
357+
uint32_t *loc32 = (uint32_t *)location;
358+
assert(IS_AARCH64_TEST_AND_BRANCH(*loc32));
359+
value -= (uintptr_t)location;
360+
// Check that we're not out of range of 16 signed bits:
361+
assert((int64_t)value >= -(1 << 15));
362+
assert((int64_t)value < (1 << 15));
363+
// Since instructions are 4-byte aligned, only use 14 bits:
364+
assert(get_bits(value, 0, 2) == 0);
365+
set_bits(loc32, 5, value, 2, 14);
366+
}
367+
335368
// 28-bit relative branch.
336369
void
337370
patch_aarch64_26r(unsigned char *location, uint64_t value)

Tools/jit/_optimizers.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,41 @@
3939
# Update with all of the inverted branches, too:
4040
_X86_BRANCHES |= {v: k for k, v in _X86_BRANCHES.items() if v}
4141

42+
_AARCH64_COND_CODES = {
43+
# https://developer.arm.com/documentation/dui0801/b/CJAJIHAD?lang=en
44+
"eq": "ne",
45+
"ne": "eq",
46+
"lt": "ge",
47+
"ge": "lt",
48+
"gt": "le",
49+
"le": "gt",
50+
"vs": "vc",
51+
"vc": "vs",
52+
"mi": "pl",
53+
"pl": "mi",
54+
"cs": "cc",
55+
"cc": "cs",
56+
"hs": "lo",
57+
"lo": "hs",
58+
"hi": "ls",
59+
"ls": "hi",
60+
}
61+
# Branches are either b.{cond} or bc.{cond}
62+
_AARCH64_BRANCHES = {
63+
"b." + cond: ("b." + inverse if inverse else None)
64+
for (cond, inverse) in _AARCH64_COND_CODES.items()
65+
} | {
66+
"bc." + cond: ("bc." + inverse if inverse else None)
67+
for (cond, inverse) in _AARCH64_COND_CODES.items()
68+
}
69+
# And four compare and branch instructions
70+
_AARCH64_BRANCHES |= {
71+
"cbz": "cbnz",
72+
"cbnz": "cbz",
73+
"tbz": "tbnz",
74+
"tbnz": "tbz",
75+
}
76+
4277

4378
@dataclasses.dataclass
4479
class _Block:
@@ -286,8 +321,15 @@ def run(self) -> None:
286321
class OptimizerAArch64(Optimizer): # pylint: disable = too-few-public-methods
287322
"""aarch64-apple-darwin/aarch64-pc-windows-msvc/aarch64-unknown-linux-gnu"""
288323

324+
_branches = _AARCH64_BRANCHES
325+
_re_branch = re.compile(
326+
rf"\s*(?P<instruction>{'|'.join(_AARCH64_BRANCHES)})\s+(.+,\s+)*(?P<target>[\w.]+)"
327+
)
328+
289329
# https://developer.arm.com/documentation/ddi0602/2025-03/Base-Instructions/B--Branch-
290330
_re_jump = re.compile(r"\s*b\s+(?P<target>[\w.]+)")
331+
# https://developer.arm.com/documentation/ddi0602/2025-09/Base-Instructions/RET--Return-from-subroutine-
332+
_re_return = re.compile(r"\s*ret\b")
291333

292334

293335
class OptimizerX86(Optimizer): # pylint: disable = too-few-public-methods

Tools/jit/_schema.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,15 @@
2020
"R_AARCH64_ADR_GOT_PAGE",
2121
"R_AARCH64_ADR_PREL_PG_HI21",
2222
"R_AARCH64_CALL26",
23+
"R_AARCH64_CONDBR19",
2324
"R_AARCH64_JUMP26",
2425
"R_AARCH64_ADD_ABS_LO12_NC",
2526
"R_AARCH64_LD64_GOT_LO12_NC",
2627
"R_AARCH64_MOVW_UABS_G0_NC",
2728
"R_AARCH64_MOVW_UABS_G1_NC",
2829
"R_AARCH64_MOVW_UABS_G2_NC",
2930
"R_AARCH64_MOVW_UABS_G3",
31+
"R_AARCH64_TSTBR14",
3032
"R_X86_64_64",
3133
"R_X86_64_GOTPCREL",
3234
"R_X86_64_GOTPCRELX",

Tools/jit/_stencils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,14 @@ class HoleValue(enum.Enum):
7474
"R_AARCH64_ADR_GOT_PAGE": "patch_aarch64_21rx",
7575
"R_AARCH64_ADR_PREL_PG_HI21": "patch_aarch64_21r",
7676
"R_AARCH64_CALL26": "patch_aarch64_26r",
77+
"R_AARCH64_CONDBR19": "patch_aarch64_19r",
7778
"R_AARCH64_JUMP26": "patch_aarch64_26r",
7879
"R_AARCH64_LD64_GOT_LO12_NC": "patch_aarch64_12x",
7980
"R_AARCH64_MOVW_UABS_G0_NC": "patch_aarch64_16a",
8081
"R_AARCH64_MOVW_UABS_G1_NC": "patch_aarch64_16b",
8182
"R_AARCH64_MOVW_UABS_G2_NC": "patch_aarch64_16c",
8283
"R_AARCH64_MOVW_UABS_G3": "patch_aarch64_16d",
84+
"R_AARCH64_TSTBR14": "patch_aarch64_14r",
8385
# x86_64-unknown-linux-gnu:
8486
"R_X86_64_64": "patch_64",
8587
"R_X86_64_GOTPCRELX": "patch_x86_64_32rx",

0 commit comments

Comments
 (0)