Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Optimize the AArch64 code generation for the JIT. Patch by Diego Russo
30 changes: 22 additions & 8 deletions Tools/jit/_stencils.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,24 @@ def pad(self, alignment: int) -> None:
self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
self.body.extend([0] * padding)

def remove_jump(self, *, alignment: int = 1) -> None:
def add_nops(self, nop: bytes, alignment: int) -> None:
"""Add NOPs until there is alignment. Fail if it is not possible."""
offset = len(self.body)
nop_size = len(nop)

# Calculate the gap to the next multiple of alignment.
gap = -offset % alignment
if gap:
if gap % nop_size == 0:
count = gap // nop_size
self.body.extend(nop * count)
else:
raise ValueError(
f"Cannot add nops of size '{nop_size}' to a body with "
f"offset '{offset}' to align with '{alignment}'"
)

def remove_jump(self) -> None:
"""Remove a zero-length continuation jump, if it exists."""
hole = max(self.holes, key=lambda hole: hole.offset)
match hole:
Expand Down Expand Up @@ -244,7 +261,7 @@ def remove_jump(self, *, alignment: int = 1) -> None:
jump = b"\x00\x00\x00\x14"
case _:
return
if self.body[offset:] == jump and offset % alignment == 0:
if self.body[offset:] == jump:
self.body = self.body[:offset]
self.holes.remove(hole)

Expand All @@ -266,10 +283,7 @@ class StencilGroup:
_trampolines: set[int] = dataclasses.field(default_factory=set, init=False)

def process_relocations(
self,
known_symbols: dict[str, int],
*,
alignment: int = 1,
self, known_symbols: dict[str, int], *, alignment: int = 1, nop: bytes = b""
) -> None:
"""Fix up all GOT and internal relocations for this stencil group."""
for hole in self.code.holes.copy():
Expand All @@ -289,8 +303,8 @@ def process_relocations(
self._trampolines.add(ordinal)
hole.addend = ordinal
hole.symbol = None
self.code.remove_jump(alignment=alignment)
self.code.pad(alignment)
self.code.remove_jump()
self.code.add_nops(nop=nop, alignment=alignment)
self.data.pad(8)
for stencil in [self.code, self.data]:
for hole in stencil.holes:
Expand Down
13 changes: 12 additions & 1 deletion Tools/jit/_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,15 @@ class _Target(typing.Generic[_S, _R]):
verbose: bool = False
known_symbols: dict[str, int] = dataclasses.field(default_factory=dict)

def _get_nop(self) -> bytes:
if re.fullmatch(r"aarch64-.*", self.triple):
nop = b"\x1f\x20\x03\xD5"
elif re.fullmatch(r"x86_64-.*|i686.*", self.triple):
nop = b"\x90"
else:
raise ValueError(f"NOP not defined for {self.triple}")
return nop

def _compute_digest(self, out: pathlib.Path) -> str:
hasher = hashlib.sha256()
hasher.update(self.triple.encode())
Expand Down Expand Up @@ -172,7 +181,9 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
stencil_groups = {task.get_name(): task.result() for task in tasks}
for stencil_group in stencil_groups.values():
stencil_group.process_relocations(
known_symbols=self.known_symbols, alignment=self.alignment
known_symbols=self.known_symbols,
alignment=self.alignment,
nop=self._get_nop(),
)
return stencil_groups

Expand Down
Loading