diff --git a/Lib/test/test_jit_stencils.py b/Lib/test/test_jit_stencils.py new file mode 100644 index 00000000000000..db836272353bb6 --- /dev/null +++ b/Lib/test/test_jit_stencils.py @@ -0,0 +1,81 @@ +import asyncio +import pathlib +import shlex +import sysconfig +import tempfile +import test.support +import test.test_tools +import test.support.script_helper +import unittest + +_CPYTHON = pathlib.Path(test.support.REPO_ROOT).resolve() +_TOOLS_JIT = _CPYTHON / "Tools" / "jit" +_TOOLS_JIT_TEST = _TOOLS_JIT / "test" +_TOOLS_JIT_TEST_TEST_EXECUTOR_CASES_C_H = _TOOLS_JIT_TEST / "test_executor_cases.c.h" +_TOOLS_JIT_BUILD_PY = _TOOLS_JIT / "build.py" + +# Skip this test if either the JIT build scripts or the needed LLVM utilities +# are missing: +test.test_tools.skip_if_missing("jit") +with test.test_tools.imports_under_tool("jit"): + import _llvm +for tool in ["clang", "llvm-objdump", "llvm-readobj"]: + if not asyncio.run(_llvm._find_tool(tool)): + raise unittest.SkipTest(f"{tool} {_llvm._LLVM_VERSION} isn't installed.") + +@test.support.cpython_only +@unittest.skipIf(test.support.Py_DEBUG, "Debug stencils aren't tested.") +@unittest.skipIf(test.support.Py_GIL_DISABLED, "Free-threaded stencils aren't tested.") +class TestJITStencils(unittest.TestCase): + + def _build_jit_stencils(self, target: str) -> str: + with tempfile.TemporaryDirectory() as work: + jit_stencils_h = pathlib.Path(work, f"jit_stencils-{target}.h").resolve() + pyconfig_h = pathlib.Path(sysconfig.get_config_h_filename()).resolve() + result, args = test.support.script_helper.run_python_until_end( + _TOOLS_JIT_BUILD_PY, + "--input-file", _TOOLS_JIT_TEST_TEST_EXECUTOR_CASES_C_H, + "--output-dir", jit_stencils_h.parent, + "--pyconfig-dir", pyconfig_h.parent, + target, + __isolated=False, + # Windows leaks temporary files on failure because the JIT build + # process is async. This forces it to be "sync" for this test: + PYTHON_CPU_COUNT="1", + ) + if result.rc: + self.skipTest(f"Build failed: {shlex.join(map(str, args))}") + body = jit_stencils_h.read_text() + # Strip out two lines of header comments: + _, _, body = body.split("\n", 2) + return body + + def _check_jit_stencils( + self, expected: str, actual: str, test_jit_stencils_h: pathlib.Path + ) -> None: + try: + self.assertEqual(expected.strip("\n"), actual.strip("\n")) + except AssertionError as e: + # Make it easy to re-validate the expected output: + relative = test_jit_stencils_h.relative_to(_CPYTHON) + message = f"If this is expected, replace {relative} with:" + banner = "=" * len(message) + e.add_note("\n".join([banner, message, banner])) + e.add_note(actual) + raise + + def test_jit_stencils(self): + self.maxDiff = None + found = False + for test_jit_stencils_h in _TOOLS_JIT_TEST.glob("test_jit_stencils-*.h"): + target = test_jit_stencils_h.stem.removeprefix("test_jit_stencils-") + with self.subTest(target): + expected = test_jit_stencils_h.read_text() + actual = self._build_jit_stencils(target) + found = True + self._check_jit_stencils(expected, actual, test_jit_stencils_h) + self.assertTrue(found, "No JIT stencils built!") + + +if __name__ == "__main__": + unittest.main() diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py index 228fc389584dd7..8f96305dbfa979 100644 --- a/Tools/jit/_schema.py +++ b/Tools/jit/_schema.py @@ -101,7 +101,8 @@ class ELFSection(typing.TypedDict): Index: int Info: int Relocations: list[dict[typing.Literal["Relocation"], ELFRelocation]] - SectionData: dict[typing.Literal["Bytes"], list[int]] + SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]] + Size: int Symbols: list[dict[typing.Literal["Symbol"], _ELFSymbol]] Type: dict[typing.Literal["Name"], str] @@ -117,4 +118,6 @@ class MachOSection(typing.TypedDict): list[dict[typing.Literal["Relocation"], MachORelocation]] ] SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]] + Segment: dict[typing.Literal["Value"], str] + Size: int Symbols: typing.NotRequired[list[dict[typing.Literal["Symbol"], _MachOSymbol]]] diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 1d82f5366f6ce0..840bf312383aa7 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -19,12 +19,16 @@ class HoleValue(enum.Enum): CODE = enum.auto() # The base address of the read-only data for this uop: DATA = enum.auto() + # The base address of the machine code for the error jump target (exposed as _JIT_ERROR_TARGET): + ERROR_TARGET = enum.auto() # The address of the current executor (exposed as _JIT_EXECUTOR): EXECUTOR = enum.auto() # The base address of the "global" offset table located in the read-only data. # Shouldn't be present in the final stencils, since these are all replaced with # equivalent DATA values: GOT = enum.auto() + # The base address of the machine code for the jump target (exposed as _JIT_JUMP_TARGET): + JUMP_TARGET = enum.auto() # The current uop's oparg (exposed as _JIT_OPARG): OPARG = enum.auto() # The current uop's operand0 on 64-bit platforms (exposed as _JIT_OPERAND0): @@ -39,10 +43,9 @@ class HoleValue(enum.Enum): OPERAND1_LO = enum.auto() # The current uop's target (exposed as _JIT_TARGET): TARGET = enum.auto() - # The base address of the machine code for the jump target (exposed as _JIT_JUMP_TARGET): - JUMP_TARGET = enum.auto() - # The base address of the machine code for the error jump target (exposed as _JIT_ERROR_TARGET): - ERROR_TARGET = enum.auto() + # Writable data, which we don't support! Optimistically remove their data + # from the stencil, and raise later if they're actually used: + WRITABLE = enum.auto() # A hardcoded value of zero (used for symbol lookups): ZERO = enum.auto() @@ -96,9 +99,11 @@ class HoleValue(enum.Enum): _HOLE_EXPRS = { HoleValue.CODE: "(uintptr_t)code", HoleValue.DATA: "(uintptr_t)data", + HoleValue.ERROR_TARGET: "state->instruction_starts[instruction->error_target]", HoleValue.EXECUTOR: "(uintptr_t)executor", # These should all have been turned into DATA values by process_relocations: # HoleValue.GOT: "", + HoleValue.JUMP_TARGET: "state->instruction_starts[instruction->jump_target]", HoleValue.OPARG: "instruction->oparg", HoleValue.OPERAND0: "instruction->operand0", HoleValue.OPERAND0_HI: "(instruction->operand0 >> 32)", @@ -107,8 +112,8 @@ class HoleValue(enum.Enum): HoleValue.OPERAND1_HI: "(instruction->operand1 >> 32)", HoleValue.OPERAND1_LO: "(instruction->operand1 & UINT32_MAX)", HoleValue.TARGET: "instruction->target", - HoleValue.JUMP_TARGET: "state->instruction_starts[instruction->jump_target]", - HoleValue.ERROR_TARGET: "state->instruction_starts[instruction->error_target]", + # These should all have raised an error if they were actually used: + # HoleValue.WRITABLE: "", HoleValue.ZERO: "", } @@ -246,6 +251,12 @@ def process_relocations(self, known_symbols: dict[str, int]) -> None: self.data.pad(8) for stencil in [self.code, self.data]: for hole in stencil.holes: + if hole.symbol in self.symbols: + value, _ = self.symbols[hole.symbol] + if value is HoleValue.WRITABLE: + raise ValueError( + f"Writable data ({hole.symbol}) is not supported!" + ) if hole.value is HoleValue.GOT: assert hole.symbol is not None hole.value = HoleValue.DATA diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 3883671e92aa39..29c5cad9e7eedc 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -51,6 +51,7 @@ class _Target(typing.Generic[_S, _R]): verbose: bool = False cflags: str = "" known_symbols: dict[str, int] = dataclasses.field(default_factory=dict) + input_file: pathlib.Path = PYTHON_EXECUTOR_CASES_C_H pyconfig_dir: pathlib.Path = pathlib.Path.cwd().resolve() def _get_nop(self) -> bytes: @@ -68,7 +69,7 @@ def _compute_digest(self) -> str: hasher.update(self.debug.to_bytes()) hasher.update(self.cflags.encode()) # These dependencies are also reflected in _JITSources in regen.targets: - hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes()) + hasher.update(self.input_file.read_bytes()) hasher.update((self.pyconfig_dir / "pyconfig.h").read_bytes()) for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)): for filename in filenames: @@ -82,10 +83,15 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: if output is not None: # Make sure that full paths don't leak out (for reproducibility): long, short = str(path), str(path.name) - group.code.disassembly.extend( - line.expandtabs().strip().replace(long, short) - for line in output.splitlines() - ) + lines = output.splitlines() + started = False + for line in lines: + if line.lstrip().startswith("0:"): + started = True + if started: + cleaned = line.replace(long, short).expandtabs().strip() + if cleaned: + group.code.disassembly.append(cleaned) args = [ "--elf-output-style=JSON", "--expand-relocs", @@ -181,10 +187,12 @@ async def _compile( return await self._parse(o) async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: - generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text() + generated_cases = self.input_file.read_text() cases_and_opnames = sorted( re.findall( - r"\n {8}(case (\w+): \{\n.*?\n {8}\})", generated_cases, flags=re.DOTALL + r"^ {8}(case (\w+): \{\n.*?\n {8}\})", + generated_cases, + flags=re.DOTALL | re.MULTILINE, ) ) tasks = [] @@ -260,7 +268,7 @@ def _handle_section( if "SectionData" in section: section_data_bytes = section["SectionData"]["Bytes"] else: - # Zeroed BSS data, seen with printf debugging calls: + # Zeroed BSS data: section_data_bytes = [0] * section["RawDataSize"] if "IMAGE_SCN_MEM_EXECUTE" in flags: value = _stencils.HoleValue.CODE @@ -270,6 +278,10 @@ def _handle_section( stencil = group.data else: return + if "IMAGE_SCN_MEM_WRITE" in flags: + assert value is _stencils.HoleValue.DATA + value = _stencils.HoleValue.WRITABLE + section_data_bytes = [] base = len(stencil.body) group.symbols[section["Number"]] = value, base stencil.body.extend(section_data_bytes) @@ -372,29 +384,39 @@ def _handle_section( if value is _stencils.HoleValue.CODE: stencil = group.code else: - assert value is _stencils.HoleValue.DATA + assert value in (_stencils.HoleValue.DATA, _stencils.HoleValue.WRITABLE) stencil = group.data for wrapped_relocation in section["Relocations"]: relocation = wrapped_relocation["Relocation"] hole = self._handle_relocation(base, relocation, stencil.body) stencil.holes.append(hole) - elif section_type == "SHT_PROGBITS": + elif section_type in {"SHT_PROGBITS", "SHT_NOBITS"}: if "SHF_ALLOC" not in flags: return + if "SectionData" in section: + section_data_bytes = section["SectionData"]["Bytes"] + else: + # Zeroed BSS data: + section_data_bytes = [0] * section["Size"] if "SHF_EXECINSTR" in flags: value = _stencils.HoleValue.CODE stencil = group.code else: value = _stencils.HoleValue.DATA stencil = group.data - group.symbols[section["Index"]] = value, len(stencil.body) + if "SHF_WRITE" in flags: + assert value is _stencils.HoleValue.DATA + value = _stencils.HoleValue.WRITABLE + section_data_bytes = [] + base = len(stencil.body) + group.symbols[section["Index"]] = value, base + stencil.body.extend(section_data_bytes) for wrapped_symbol in section["Symbols"]: symbol = wrapped_symbol["Symbol"] - offset = len(stencil.body) + symbol["Value"] + offset = base + symbol["Value"] name = symbol["Name"]["Name"] name = name.removeprefix(self.symbol_prefix) group.symbols[name] = value, offset - stencil.body.extend(section["SectionData"]["Bytes"]) assert not section["Relocations"] else: assert section_type in { @@ -452,33 +474,35 @@ class _MachO( def _handle_section( self, section: _schema.MachOSection, group: _stencils.StencilGroup ) -> None: - assert section["Address"] >= len(group.code.body) - assert "SectionData" in section + if "SectionData" in section: + section_data_bytes = section["SectionData"]["Bytes"] + else: + # Zeroed BSS data: + section_data_bytes = [0] * section["Size"] flags = {flag["Name"] for flag in section["Attributes"]["Flags"]} - name = section["Name"]["Value"] - name = name.removeprefix(self.symbol_prefix) if "Debug" in flags: return if "PureInstructions" in flags: value = _stencils.HoleValue.CODE stencil = group.code - start_address = 0 - group.symbols[name] = value, section["Address"] - start_address else: value = _stencils.HoleValue.DATA stencil = group.data - start_address = len(group.code.body) - group.symbols[name] = value, len(group.code.body) - base = section["Address"] - start_address + segment = section["Segment"]["Value"] + assert segment in {"__DATA", "__TEXT"}, segment + if segment == "__DATA": + value = _stencils.HoleValue.WRITABLE + section_data_bytes = [] + base = len(stencil.body) group.symbols[section["Index"]] = value, base - stencil.body.extend( - [0] * (section["Address"] - len(group.code.body) - len(group.data.body)) - ) - stencil.body.extend(section["SectionData"]["Bytes"]) + stencil.body.extend(section_data_bytes) + name = section["Name"]["Value"] + name = name.removeprefix(self.symbol_prefix) + group.symbols[name] = value, base assert "Symbols" in section for wrapped_symbol in section["Symbols"]: symbol = wrapped_symbol["Symbol"] - offset = symbol["Value"] - start_address + offset = base + symbol["Value"] - section["Address"] name = symbol["Name"]["Name"] name = name.removeprefix(self.symbol_prefix) group.symbols[name] = value, offset @@ -557,38 +581,45 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO: optimizer: type[_optimizers.Optimizer] target: _COFF32 | _COFF64 | _ELF | _MachO if re.fullmatch(r"aarch64-apple-darwin.*", host): + host = "aarch64-apple-darwin" condition = "defined(__aarch64__) && defined(__APPLE__)" optimizer = _optimizers.OptimizerAArch64 target = _MachO(host, condition, optimizer=optimizer) elif re.fullmatch(r"aarch64-pc-windows-msvc", host): - args = ["-fms-runtime-lib=dll", "-fplt"] + host = "aarch64-pc-windows-msvc" condition = "defined(_M_ARM64)" + args = ["-fms-runtime-lib=dll", "-fplt"] optimizer = _optimizers.OptimizerAArch64 target = _COFF64(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"aarch64-.*-linux-gnu", host): + host = "aarch64-unknown-linux-gnu" + condition = "defined(__aarch64__) && defined(__linux__)" # -mno-outline-atomics: Keep intrinsics from being emitted. args = ["-fpic", "-mno-outline-atomics"] - condition = "defined(__aarch64__) && defined(__linux__)" optimizer = _optimizers.OptimizerAArch64 target = _ELF(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"i686-pc-windows-msvc", host): + host = "i686-pc-windows-msvc" + condition = "defined(_M_IX86)" # -Wno-ignored-attributes: __attribute__((preserve_none)) is not supported here. args = ["-DPy_NO_ENABLE_SHARED", "-Wno-ignored-attributes"] optimizer = _optimizers.OptimizerX86 - condition = "defined(_M_IX86)" target = _COFF32(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"x86_64-apple-darwin.*", host): + host = "x86_64-apple-darwin" condition = "defined(__x86_64__) && defined(__APPLE__)" optimizer = _optimizers.OptimizerX86 target = _MachO(host, condition, optimizer=optimizer) elif re.fullmatch(r"x86_64-pc-windows-msvc", host): - args = ["-fms-runtime-lib=dll"] + host = "x86_64-pc-windows-msvc" condition = "defined(_M_X64)" + args = ["-fms-runtime-lib=dll"] optimizer = _optimizers.OptimizerX86 target = _COFF64(host, condition, args=args, optimizer=optimizer) elif re.fullmatch(r"x86_64-.*-linux-gnu", host): - args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"] + host = "x86_64-unknown-linux-gnu" condition = "defined(__x86_64__) && defined(__linux__)" + args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"] optimizer = _optimizers.OptimizerX86 target = _ELF(host, condition, args=args, optimizer=optimizer) else: diff --git a/Tools/jit/build.py b/Tools/jit/build.py index a0733005929bf2..b0122850e2de3a 100644 --- a/Tools/jit/build.py +++ b/Tools/jit/build.py @@ -22,6 +22,12 @@ parser.add_argument( "-f", "--force", action="store_true", help="force the entire JIT to be rebuilt" ) + parser.add_argument( + "-i", + "--input-file", + help="where to find the generated executor cases", + type=lambda p: pathlib.Path(p).resolve(), + ) parser.add_argument( "-o", "--output-dir", @@ -48,6 +54,8 @@ target.force = args.force target.verbose = args.verbose target.cflags = args.cflags + if args.input_file is not None: + target.input_file = args.input_file target.pyconfig_dir = args.pyconfig_dir target.build( comment=comment, diff --git a/Tools/jit/test/test_executor_cases.c.h b/Tools/jit/test/test_executor_cases.c.h new file mode 100644 index 00000000000000..496380cfc081a5 --- /dev/null +++ b/Tools/jit/test/test_executor_cases.c.h @@ -0,0 +1,29 @@ + case 0: { + // Zero-length jumps should be removed: + break; + } + + case 1: { + // -Os duplicates less code than -O3: + PyAPI_DATA(bool) sausage; + PyAPI_DATA(bool) spammed; + PyAPI_FUNC(void) order_eggs_and_bacon(void); + PyAPI_FUNC(void) order_eggs_sausage_and_bacon(void); + if (!sausage) { + order_eggs_and_bacon(); + } + else { + order_eggs_sausage_and_bacon(); + } + spammed = false; + break; + } + + case 2: { + // The assembly optimizer inverts hot branches: + PyAPI_DATA(bool) spam; + if (spam) { + JUMP_TO_ERROR(); + } + break; + } diff --git a/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h b/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h new file mode 100644 index 00000000000000..c26310b9ab4cc4 --- /dev/null +++ b/Tools/jit/test/test_jit_stencils-aarch64-apple-darwin.h @@ -0,0 +1,149 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 6db63bef stp d15, d14, [sp, #-0xa0]! + // 4: 6d0133ed stp d13, d12, [sp, #0x10] + // 8: 6d022beb stp d11, d10, [sp, #0x20] + // c: 6d0323e9 stp d9, d8, [sp, #0x30] + // 10: a9046ffc stp x28, x27, [sp, #0x40] + // 14: a90567fa stp x26, x25, [sp, #0x50] + // 18: a9065ff8 stp x24, x23, [sp, #0x60] + // 1c: a90757f6 stp x22, x21, [sp, #0x70] + // 20: a9084ff4 stp x20, x19, [sp, #0x80] + // 24: a9097bfd stp x29, x30, [sp, #0x90] + // 28: 910243fd add x29, sp, #0x90 + // 2c: aa0003f4 mov x20, x0 + // 30: aa0103f5 mov x21, x1 + // 34: aa0203f6 mov x22, x2 + // 38: 9400000c bl 0x68 + // 3c: a9497bfd ldp x29, x30, [sp, #0x90] + // 40: a9484ff4 ldp x20, x19, [sp, #0x80] + // 44: a94757f6 ldp x22, x21, [sp, #0x70] + // 48: a9465ff8 ldp x24, x23, [sp, #0x60] + // 4c: a94567fa ldp x26, x25, [sp, #0x50] + // 50: a9446ffc ldp x28, x27, [sp, #0x40] + // 54: 6d4323e9 ldp d9, d8, [sp, #0x30] + // 58: 6d422beb ldp d11, d10, [sp, #0x20] + // 5c: 6d4133ed ldp d13, d12, [sp, #0x10] + // 60: 6cca3bef ldp d15, d14, [sp], #0xa0 + // 64: d65f03c0 ret + const unsigned char code_body[104] = { + 0xef, 0x3b, 0xb6, 0x6d, 0xed, 0x33, 0x01, 0x6d, + 0xeb, 0x2b, 0x02, 0x6d, 0xe9, 0x23, 0x03, 0x6d, + 0xfc, 0x6f, 0x04, 0xa9, 0xfa, 0x67, 0x05, 0xa9, + 0xf8, 0x5f, 0x06, 0xa9, 0xf6, 0x57, 0x07, 0xa9, + 0xf4, 0x4f, 0x08, 0xa9, 0xfd, 0x7b, 0x09, 0xa9, + 0xfd, 0x43, 0x02, 0x91, 0xf4, 0x03, 0x00, 0xaa, + 0xf5, 0x03, 0x01, 0xaa, 0xf6, 0x03, 0x02, 0xaa, + 0x0c, 0x00, 0x00, 0x94, 0xfd, 0x7b, 0x49, 0xa9, + 0xf4, 0x4f, 0x48, 0xa9, 0xf6, 0x57, 0x47, 0xa9, + 0xf8, 0x5f, 0x46, 0xa9, 0xfa, 0x67, 0x45, 0xa9, + 0xfc, 0x6f, 0x44, 0xa9, 0xe9, 0x23, 0x43, 0x6d, + 0xeb, 0x2b, 0x42, 0x6d, 0xed, 0x33, 0x41, 0x6d, + 0xef, 0x3b, 0xca, 0x6c, 0xc0, 0x03, 0x5f, 0xd6, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: a9bf7bfd stp x29, x30, [sp, #-0x10]! + // 4: 910003fd mov x29, sp + // 8: 90000008 adrp x8, 0x0 + // 0000000000000008: ARM64_RELOC_GOT_LOAD_PAGE21 _sausage + // c: f9400108 ldr x8, [x8] + // 000000000000000c: ARM64_RELOC_GOT_LOAD_PAGEOFF12 _sausage + // 10: 39400108 ldrb w8, [x8] + // 14: 36000068 tbz w8, #0x0, 0x20 + // 18: 94000000 bl 0x18 + // 0000000000000018: ARM64_RELOC_BRANCH26 _order_eggs_sausage_and_bacon + // 1c: 14000002 b 0x24 + // 20: 94000000 bl 0x20 + // 0000000000000020: ARM64_RELOC_BRANCH26 _order_eggs_and_bacon + // 24: 90000008 adrp x8, 0x0 + // 0000000000000024: ARM64_RELOC_GOT_LOAD_PAGE21 _spammed + // 28: f9400108 ldr x8, [x8] + // 0000000000000028: ARM64_RELOC_GOT_LOAD_PAGEOFF12 _spammed + // 2c: 3900011f strb wzr, [x8] + // 30: a8c17bfd ldp x29, x30, [sp], #0x10 + const unsigned char code_body[52] = { + 0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x08, 0x01, 0x40, 0x39, 0x68, 0x00, 0x00, 0x36, + 0x00, 0x00, 0x00, 0x94, 0x02, 0x00, 0x00, 0x14, + 0x00, 0x00, 0x00, 0x94, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x1f, 0x01, 0x00, 0x39, + 0xfd, 0x7b, 0xc1, 0xa8, + }; + // 0: &spammed+0x0 + // 8: &sausage+0x0 + patch_64(data + 0x0, (uintptr_t)&spammed); + patch_64(data + 0x8, (uintptr_t)&sausage); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x8, (uintptr_t)data + 0x8); + patch_aarch64_trampoline(code + 0x18, 0x1, state); + patch_aarch64_trampoline(code + 0x20, 0x0, state); + patch_aarch64_33rx(code + 0x24, (uintptr_t)data); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 90000008 adrp x8, 0x0 + // 0000000000000000: ARM64_RELOC_GOT_LOAD_PAGE21 _spam + // 4: f9400108 ldr x8, [x8] + // 0000000000000004: ARM64_RELOC_GOT_LOAD_PAGEOFF12 _spam + // 8: 39400108 ldrb w8, [x8] + // c: 7100051f cmp w8, #0x1 + // 10: 54000041 b.ne 0x18 + // 14: 14000000 b 0x14 + // 0000000000000014: ARM64_RELOC_BRANCH26 __JIT_ERROR_TARGET + const unsigned char code_body[24] = { + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x08, 0x01, 0x40, 0x39, 0x1f, 0x05, 0x00, 0x71, + 0x41, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x14, + }; + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x0, (uintptr_t)data); + patch_aarch64_26r(code + 0x14, state->instruction_starts[instruction->error_target]); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 104, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 52, 16, {0x03}}, + [2] = {emit_2, 24, 8, {0}}, +}; + +static const void * const symbols_map[2] = { + [0] = &order_eggs_and_bacon, + [1] = &order_eggs_sausage_and_bacon, +}; diff --git a/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h new file mode 100644 index 00000000000000..b8a3afbaee133e --- /dev/null +++ b/Tools/jit/test/test_jit_stencils-aarch64-pc-windows-msvc.h @@ -0,0 +1,156 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 6db63bef stp d15, d14, [sp, #-0xa0]! + // 4: a90857f6 stp x22, x21, [sp, #0x80] + // 8: aa0103f5 mov x21, x1 + // c: aa0203f6 mov x22, x2 + // 10: a9094ff4 stp x20, x19, [sp, #0x90] + // 14: aa0003f4 mov x20, x0 + // 18: 6d0133ed stp d13, d12, [sp, #0x10] + // 1c: 6d022beb stp d11, d10, [sp, #0x20] + // 20: 6d0323e9 stp d9, d8, [sp, #0x30] + // 24: f90023fe str x30, [sp, #0x40] + // 28: a9056ffc stp x28, x27, [sp, #0x50] + // 2c: a90667fa stp x26, x25, [sp, #0x60] + // 30: a9075ff8 stp x24, x23, [sp, #0x70] + // 34: 9400000c bl 0x64 <_JIT_ENTRY+0x64> + // 38: a9494ff4 ldp x20, x19, [sp, #0x90] + // 3c: f94023fe ldr x30, [sp, #0x40] + // 40: a94857f6 ldp x22, x21, [sp, #0x80] + // 44: a9475ff8 ldp x24, x23, [sp, #0x70] + // 48: a94667fa ldp x26, x25, [sp, #0x60] + // 4c: a9456ffc ldp x28, x27, [sp, #0x50] + // 50: 6d4323e9 ldp d9, d8, [sp, #0x30] + // 54: 6d422beb ldp d11, d10, [sp, #0x20] + // 58: 6d4133ed ldp d13, d12, [sp, #0x10] + // 5c: 6cca3bef ldp d15, d14, [sp], #0xa0 + // 60: d65f03c0 ret + const unsigned char code_body[100] = { + 0xef, 0x3b, 0xb6, 0x6d, 0xf6, 0x57, 0x08, 0xa9, + 0xf5, 0x03, 0x01, 0xaa, 0xf6, 0x03, 0x02, 0xaa, + 0xf4, 0x4f, 0x09, 0xa9, 0xf4, 0x03, 0x00, 0xaa, + 0xed, 0x33, 0x01, 0x6d, 0xeb, 0x2b, 0x02, 0x6d, + 0xe9, 0x23, 0x03, 0x6d, 0xfe, 0x23, 0x00, 0xf9, + 0xfc, 0x6f, 0x05, 0xa9, 0xfa, 0x67, 0x06, 0xa9, + 0xf8, 0x5f, 0x07, 0xa9, 0x0c, 0x00, 0x00, 0x94, + 0xf4, 0x4f, 0x49, 0xa9, 0xfe, 0x23, 0x40, 0xf9, + 0xf6, 0x57, 0x48, 0xa9, 0xf8, 0x5f, 0x47, 0xa9, + 0xfa, 0x67, 0x46, 0xa9, 0xfc, 0x6f, 0x45, 0xa9, + 0xe9, 0x23, 0x43, 0x6d, 0xeb, 0x2b, 0x42, 0x6d, + 0xed, 0x33, 0x41, 0x6d, 0xef, 0x3b, 0xca, 0x6c, + 0xc0, 0x03, 0x5f, 0xd6, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: f81f0ffe str x30, [sp, #-0x10]! + // 4: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000004: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_sausage + // 8: f9400108 ldr x8, [x8] + // 0000000000000008: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_sausage + // c: 39400108 ldrb w8, [x8] + // 10: 36000088 tbz w8, #0x0, 0x20 <_JIT_ENTRY+0x20> + // 14: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000014: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_order_eggs_sausage_and_bacon + // 18: f9400108 ldr x8, [x8] + // 0000000000000018: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_order_eggs_sausage_and_bacon + // 1c: 14000003 b 0x28 <_JIT_ENTRY+0x28> + // 20: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000020: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_order_eggs_and_bacon + // 24: f9400108 ldr x8, [x8] + // 0000000000000024: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_order_eggs_and_bacon + // 28: d63f0100 blr x8 + // 2c: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 000000000000002c: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_spammed + // 30: f9400108 ldr x8, [x8] + // 0000000000000030: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_spammed + // 34: 3900011f strb wzr, [x8] + // 38: f84107fe ldr x30, [sp], #0x10 + const unsigned char code_body[60] = { + 0xfe, 0x0f, 0x1f, 0xf8, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x08, 0x01, 0x40, 0x39, + 0x88, 0x00, 0x00, 0x36, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x03, 0x00, 0x00, 0x14, + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x00, 0x01, 0x3f, 0xd6, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x1f, 0x01, 0x00, 0x39, + 0xfe, 0x07, 0x41, 0xf8, + }; + // 0: &sausage+0x0 + // 8: &order_eggs_sausage_and_bacon+0x0 + // 10: &order_eggs_and_bacon+0x0 + // 18: &spammed+0x0 + patch_64(data + 0x0, (uintptr_t)&sausage); + patch_64(data + 0x8, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x10, (uintptr_t)&order_eggs_and_bacon); + patch_64(data + 0x18, (uintptr_t)&spammed); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x4, (uintptr_t)data); + patch_aarch64_33rx(code + 0x14, (uintptr_t)data + 0x8); + patch_aarch64_33rx(code + 0x20, (uintptr_t)data + 0x10); + patch_aarch64_33rx(code + 0x2c, (uintptr_t)data + 0x18); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000000: IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_spam + // 4: f9400108 ldr x8, [x8] + // 0000000000000004: IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_spam + // 8: 39400108 ldrb w8, [x8] + // c: 7100051f cmp w8, #0x1 + // 10: 54000041 b.ne 0x18 <_JIT_ENTRY+0x18> + // 14: 14000000 b 0x14 <_JIT_ENTRY+0x14> + // 0000000000000014: IMAGE_REL_ARM64_BRANCH26 _JIT_ERROR_TARGET + const unsigned char code_body[24] = { + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x08, 0x01, 0x40, 0x39, 0x1f, 0x05, 0x00, 0x71, + 0x41, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x14, + }; + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x0, (uintptr_t)data); + patch_aarch64_26r(code + 0x14, state->instruction_starts[instruction->error_target]); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 100, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 60, 32, {0}}, + [2] = {emit_2, 24, 8, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; diff --git a/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h b/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h new file mode 100644 index 00000000000000..5e2ed0db8cc2d5 --- /dev/null +++ b/Tools/jit/test/test_jit_stencils-aarch64-unknown-linux-gnu.h @@ -0,0 +1,159 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 6db63bef stp d15, d14, [sp, #-0xa0]! + // 4: a90857f6 stp x22, x21, [sp, #0x80] + // 8: aa0103f5 mov x21, x1 + // c: aa0203f6 mov x22, x2 + // 10: a9094ff4 stp x20, x19, [sp, #0x90] + // 14: aa0003f4 mov x20, x0 + // 18: 6d0133ed stp d13, d12, [sp, #0x10] + // 1c: 6d022beb stp d11, d10, [sp, #0x20] + // 20: 6d0323e9 stp d9, d8, [sp, #0x30] + // 24: a9047bfd stp x29, x30, [sp, #0x40] + // 28: 910103fd add x29, sp, #0x40 + // 2c: a9056ffc stp x28, x27, [sp, #0x50] + // 30: a90667fa stp x26, x25, [sp, #0x60] + // 34: a9075ff8 stp x24, x23, [sp, #0x70] + // 38: 9400000c bl 0x68 <_JIT_ENTRY+0x68> + // 3c: a9494ff4 ldp x20, x19, [sp, #0x90] + // 40: a94857f6 ldp x22, x21, [sp, #0x80] + // 44: a9475ff8 ldp x24, x23, [sp, #0x70] + // 48: a94667fa ldp x26, x25, [sp, #0x60] + // 4c: a9456ffc ldp x28, x27, [sp, #0x50] + // 50: a9447bfd ldp x29, x30, [sp, #0x40] + // 54: 6d4323e9 ldp d9, d8, [sp, #0x30] + // 58: 6d422beb ldp d11, d10, [sp, #0x20] + // 5c: 6d4133ed ldp d13, d12, [sp, #0x10] + // 60: 6cca3bef ldp d15, d14, [sp], #0xa0 + // 64: d65f03c0 ret + const unsigned char code_body[104] = { + 0xef, 0x3b, 0xb6, 0x6d, 0xf6, 0x57, 0x08, 0xa9, + 0xf5, 0x03, 0x01, 0xaa, 0xf6, 0x03, 0x02, 0xaa, + 0xf4, 0x4f, 0x09, 0xa9, 0xf4, 0x03, 0x00, 0xaa, + 0xed, 0x33, 0x01, 0x6d, 0xeb, 0x2b, 0x02, 0x6d, + 0xe9, 0x23, 0x03, 0x6d, 0xfd, 0x7b, 0x04, 0xa9, + 0xfd, 0x03, 0x01, 0x91, 0xfc, 0x6f, 0x05, 0xa9, + 0xfa, 0x67, 0x06, 0xa9, 0xf8, 0x5f, 0x07, 0xa9, + 0x0c, 0x00, 0x00, 0x94, 0xf4, 0x4f, 0x49, 0xa9, + 0xf6, 0x57, 0x48, 0xa9, 0xf8, 0x5f, 0x47, 0xa9, + 0xfa, 0x67, 0x46, 0xa9, 0xfc, 0x6f, 0x45, 0xa9, + 0xfd, 0x7b, 0x44, 0xa9, 0xe9, 0x23, 0x43, 0x6d, + 0xeb, 0x2b, 0x42, 0x6d, 0xed, 0x33, 0x41, 0x6d, + 0xef, 0x3b, 0xca, 0x6c, 0xc0, 0x03, 0x5f, 0xd6, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: a9bf7bfd stp x29, x30, [sp, #-0x10]! + // 4: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000004: R_AARCH64_ADR_GOT_PAGE sausage + // 8: 910003fd mov x29, sp + // c: f9400108 ldr x8, [x8] + // 000000000000000c: R_AARCH64_LD64_GOT_LO12_NC sausage + // 10: 39400108 ldrb w8, [x8] + // 14: 36000088 tbz w8, #0x0, 0x24 <_JIT_ENTRY+0x24> + // 18: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000018: R_AARCH64_ADR_GOT_PAGE order_eggs_sausage_and_bacon + // 1c: f9400108 ldr x8, [x8] + // 000000000000001c: R_AARCH64_LD64_GOT_LO12_NC order_eggs_sausage_and_bacon + // 20: 14000003 b 0x2c <_JIT_ENTRY+0x2c> + // 24: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000024: R_AARCH64_ADR_GOT_PAGE order_eggs_and_bacon + // 28: f9400108 ldr x8, [x8] + // 0000000000000028: R_AARCH64_LD64_GOT_LO12_NC order_eggs_and_bacon + // 2c: d63f0100 blr x8 + // 30: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000030: R_AARCH64_ADR_GOT_PAGE spammed + // 34: f9400108 ldr x8, [x8] + // 0000000000000034: R_AARCH64_LD64_GOT_LO12_NC spammed + // 38: 3900011f strb wzr, [x8] + // 3c: a8c17bfd ldp x29, x30, [sp], #0x10 + const unsigned char code_body[64] = { + 0xfd, 0x7b, 0xbf, 0xa9, 0x08, 0x00, 0x00, 0x90, + 0xfd, 0x03, 0x00, 0x91, 0x08, 0x01, 0x40, 0xf9, + 0x08, 0x01, 0x40, 0x39, 0x88, 0x00, 0x00, 0x36, + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x03, 0x00, 0x00, 0x14, 0x08, 0x00, 0x00, 0x90, + 0x08, 0x01, 0x40, 0xf9, 0x00, 0x01, 0x3f, 0xd6, + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x1f, 0x01, 0x00, 0x39, 0xfd, 0x7b, 0xc1, 0xa8, + }; + // 0: &sausage+0x0 + // 8: &order_eggs_sausage_and_bacon+0x0 + // 10: &order_eggs_and_bacon+0x0 + // 18: &spammed+0x0 + patch_64(data + 0x0, (uintptr_t)&sausage); + patch_64(data + 0x8, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x10, (uintptr_t)&order_eggs_and_bacon); + patch_64(data + 0x18, (uintptr_t)&spammed); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_21rx(code + 0x4, (uintptr_t)data); + patch_aarch64_12x(code + 0xc, (uintptr_t)data); + patch_aarch64_33rx(code + 0x18, (uintptr_t)data + 0x8); + patch_aarch64_33rx(code + 0x24, (uintptr_t)data + 0x10); + patch_aarch64_33rx(code + 0x30, (uintptr_t)data + 0x18); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 90000008 adrp x8, 0x0 <_JIT_ENTRY> + // 0000000000000000: R_AARCH64_ADR_GOT_PAGE spam + // 4: f9400108 ldr x8, [x8] + // 0000000000000004: R_AARCH64_LD64_GOT_LO12_NC spam + // 8: 39400108 ldrb w8, [x8] + // c: 7100051f cmp w8, #0x1 + // 10: 54000041 b.ne 0x18 <_JIT_ENTRY+0x18> + // 14: 14000000 b 0x14 <_JIT_ENTRY+0x14> + // 0000000000000014: R_AARCH64_JUMP26 _JIT_ERROR_TARGET + const unsigned char code_body[24] = { + 0x08, 0x00, 0x00, 0x90, 0x08, 0x01, 0x40, 0xf9, + 0x08, 0x01, 0x40, 0x39, 0x1f, 0x05, 0x00, 0x71, + 0x41, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x14, + }; + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); + memcpy(code, code_body, sizeof(code_body)); + patch_aarch64_33rx(code + 0x0, (uintptr_t)data); + patch_aarch64_26r(code + 0x14, state->instruction_starts[instruction->error_target]); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 104, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 64, 32, {0}}, + [2] = {emit_2, 24, 8, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; diff --git a/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h new file mode 100644 index 00000000000000..2f8e7d768cd78f --- /dev/null +++ b/Tools/jit/test/test_jit_stencils-i686-pc-windows-msvc.h @@ -0,0 +1,125 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 8b 44 24 0c movl 0xc(%esp), %eax + // 4: 8b 4c 24 08 movl 0x8(%esp), %ecx + // 8: 8b 54 24 04 movl 0x4(%esp), %edx + // c: 89 54 24 04 movl %edx, 0x4(%esp) + // 10: 89 4c 24 08 movl %ecx, 0x8(%esp) + // 14: 89 44 24 0c movl %eax, 0xc(%esp) + const unsigned char code_body[24] = { + 0x8b, 0x44, 0x24, 0x0c, 0x8b, 0x4c, 0x24, 0x08, + 0x8b, 0x54, 0x24, 0x04, 0x89, 0x54, 0x24, 0x04, + 0x89, 0x4c, 0x24, 0x08, 0x89, 0x44, 0x24, 0x0c, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 53 pushl %ebx + // 1: 57 pushl %edi + // 2: 56 pushl %esi + // 3: 8b 74 24 18 movl 0x18(%esp), %esi + // 7: 8b 7c 24 14 movl 0x14(%esp), %edi + // b: 8b 5c 24 10 movl 0x10(%esp), %ebx + // f: 80 3d 00 00 00 00 00 cmpb $0x0, 0x0 + // 00000011: IMAGE_REL_I386_DIR32 _sausage + // 16: 74 07 je 0x1f <__JIT_ENTRY+0x1f> + // 18: e8 00 00 00 00 calll 0x1d <__JIT_ENTRY+0x1d> + // 00000019: IMAGE_REL_I386_REL32 _order_eggs_sausage_and_bacon + // 1d: eb 05 jmp 0x24 <__JIT_ENTRY+0x24> + // 1f: e8 00 00 00 00 calll 0x24 <__JIT_ENTRY+0x24> + // 00000020: IMAGE_REL_I386_REL32 _order_eggs_and_bacon + // 24: c6 05 00 00 00 00 00 movb $0x0, 0x0 + // 00000026: IMAGE_REL_I386_DIR32 _spammed + // 2b: 89 5c 24 10 movl %ebx, 0x10(%esp) + // 2f: 89 7c 24 14 movl %edi, 0x14(%esp) + // 33: 89 74 24 18 movl %esi, 0x18(%esp) + // 37: 5e popl %esi + // 38: 5f popl %edi + // 39: 5b popl %ebx + const unsigned char code_body[58] = { + 0x53, 0x57, 0x56, 0x8b, 0x74, 0x24, 0x18, 0x8b, + 0x7c, 0x24, 0x14, 0x8b, 0x5c, 0x24, 0x10, 0x80, + 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0x07, + 0xe8, 0x00, 0x00, 0x00, 0x00, 0xeb, 0x05, 0xe8, + 0x00, 0x00, 0x00, 0x00, 0xc6, 0x05, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x89, 0x5c, 0x24, 0x10, 0x89, + 0x7c, 0x24, 0x14, 0x89, 0x74, 0x24, 0x18, 0x5e, + 0x5f, 0x5b, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_32(code + 0x11, (uintptr_t)&sausage); + patch_x86_64_32rx(code + 0x19, (uintptr_t)&order_eggs_sausage_and_bacon + -0x4); + patch_x86_64_32rx(code + 0x20, (uintptr_t)&order_eggs_and_bacon + -0x4); + patch_32(code + 0x26, (uintptr_t)&spammed); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 8b 54 24 0c movl 0xc(%esp), %edx + // 4: 8b 4c 24 08 movl 0x8(%esp), %ecx + // 8: 8b 44 24 04 movl 0x4(%esp), %eax + // c: 80 3d 00 00 00 00 01 cmpb $0x1, 0x0 + // 0000000e: IMAGE_REL_I386_DIR32 _spam + // 13: 75 11 jne 0x26 <__JIT_ENTRY+0x26> + // 15: 89 54 24 0c movl %edx, 0xc(%esp) + // 19: 89 4c 24 08 movl %ecx, 0x8(%esp) + // 1d: 89 44 24 04 movl %eax, 0x4(%esp) + // 21: e9 00 00 00 00 jmp 0x26 <__JIT_ENTRY+0x26> + // 00000022: IMAGE_REL_I386_REL32 __JIT_ERROR_TARGET + // 26: 89 54 24 0c movl %edx, 0xc(%esp) + // 2a: 89 4c 24 08 movl %ecx, 0x8(%esp) + // 2e: 89 44 24 04 movl %eax, 0x4(%esp) + const unsigned char code_body[50] = { + 0x8b, 0x54, 0x24, 0x0c, 0x8b, 0x4c, 0x24, 0x08, + 0x8b, 0x44, 0x24, 0x04, 0x80, 0x3d, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x75, 0x11, 0x89, 0x54, 0x24, + 0x0c, 0x89, 0x4c, 0x24, 0x08, 0x89, 0x44, 0x24, + 0x04, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x89, 0x54, + 0x24, 0x0c, 0x89, 0x4c, 0x24, 0x08, 0x89, 0x44, + 0x24, 0x04, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_32(code + 0xe, (uintptr_t)&spam); + patch_x86_64_32rx(code + 0x22, state->instruction_starts[instruction->error_target] + -0x4); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 0, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 24, 0, {0}}, + [1] = {emit_1, 58, 0, {0}}, + [2] = {emit_2, 50, 0, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; diff --git a/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h b/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h new file mode 100644 index 00000000000000..4d6ee50c4c1f41 --- /dev/null +++ b/Tools/jit/test/test_jit_stencils-x86_64-apple-darwin.h @@ -0,0 +1,142 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 55 pushq %rbp + // 1: 48 89 e5 movq %rsp, %rbp + // 4: 41 57 pushq %r15 + // 6: 41 56 pushq %r14 + // 8: 41 55 pushq %r13 + // a: 41 54 pushq %r12 + // c: 53 pushq %rbx + // d: 50 pushq %rax + // e: 49 89 fc movq %rdi, %r12 + // 11: 49 89 f5 movq %rsi, %r13 + // 14: 49 89 d6 movq %rdx, %r14 + // 17: e8 0f 00 00 00 callq 0x2b <__JIT_ENTRY+0x2b> + // 1c: 48 83 c4 08 addq $0x8, %rsp + // 20: 5b popq %rbx + // 21: 41 5c popq %r12 + // 23: 41 5d popq %r13 + // 25: 41 5e popq %r14 + // 27: 41 5f popq %r15 + // 29: 5d popq %rbp + // 2a: c3 retq + const unsigned char code_body[43] = { + 0x55, 0x48, 0x89, 0xe5, 0x41, 0x57, 0x41, 0x56, + 0x41, 0x55, 0x41, 0x54, 0x53, 0x50, 0x49, 0x89, + 0xfc, 0x49, 0x89, 0xf5, 0x49, 0x89, 0xd6, 0xe8, + 0x0f, 0x00, 0x00, 0x00, 0x48, 0x83, 0xc4, 0x08, + 0x5b, 0x41, 0x5c, 0x41, 0x5d, 0x41, 0x5e, 0x41, + 0x5f, 0x5d, 0xc3, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 55 pushq %rbp + // 1: 48 89 e5 movq %rsp, %rbp + // 4: 5d popq %rbp + const unsigned char code_body[5] = { + 0x55, 0x48, 0x89, 0xe5, 0x5d, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 55 pushq %rbp + // 1: 48 89 e5 movq %rsp, %rbp + // 4: 48 8b 05 00 00 00 00 movq (%rip), %rax ## 0xb <__JIT_ENTRY+0xb> + // 0000000000000007: X86_64_RELOC_GOT_LOAD _sausage@GOTPCREL + // b: 80 38 00 cmpb $0x0, (%rax) + // e: 74 08 je 0x18 <__JIT_ENTRY+0x18> + // 10: ff 15 00 00 00 00 callq *(%rip) ## 0x16 <__JIT_ENTRY+0x16> + // 0000000000000012: X86_64_RELOC_GOT _order_eggs_sausage_and_bacon@GOTPCREL + // 16: eb 06 jmp 0x1e <__JIT_ENTRY+0x1e> + // 18: ff 15 00 00 00 00 callq *(%rip) ## 0x1e <__JIT_ENTRY+0x1e> + // 000000000000001a: X86_64_RELOC_GOT _order_eggs_and_bacon@GOTPCREL + // 1e: 48 8b 05 00 00 00 00 movq (%rip), %rax ## 0x25 <__JIT_ENTRY+0x25> + // 0000000000000021: X86_64_RELOC_GOT_LOAD _spammed@GOTPCREL + // 25: c6 00 00 movb $0x0, (%rax) + // 28: 5d popq %rbp + const unsigned char code_body[41] = { + 0x55, 0x48, 0x89, 0xe5, 0x48, 0x8b, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x38, 0x00, 0x74, 0x08, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0xeb, 0x06, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, + 0x05, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, + 0x5d, + }; + // 0: &spammed+0x0 + // 8: &order_eggs_and_bacon+0x0 + // 10: &order_eggs_sausage_and_bacon+0x0 + // 18: &sausage+0x0 + patch_64(data + 0x0, (uintptr_t)&spammed); + patch_64(data + 0x8, (uintptr_t)&order_eggs_and_bacon); + patch_64(data + 0x10, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x18, (uintptr_t)&sausage); + memcpy(code, code_body, sizeof(code_body)); + patch_x86_64_32rx(code + 0x7, (uintptr_t)data + 0x14); + patch_x86_64_32rx(code + 0x12, (uintptr_t)data + 0xc); + patch_x86_64_32rx(code + 0x1a, (uintptr_t)data + 0x4); + patch_x86_64_32rx(code + 0x21, (uintptr_t)data + -0x4); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 55 pushq %rbp + // 1: 48 89 e5 movq %rsp, %rbp + // 4: 48 8b 05 00 00 00 00 movq (%rip), %rax ## 0xb <__JIT_ENTRY+0xb> + // 0000000000000007: X86_64_RELOC_GOT_LOAD _spam@GOTPCREL + // b: 80 38 01 cmpb $0x1, (%rax) + // e: 75 06 jne 0x16 <__JIT_ENTRY+0x16> + // 10: 5d popq %rbp + // 11: e9 00 00 00 00 jmp 0x16 <__JIT_ENTRY+0x16> + // 0000000000000012: X86_64_RELOC_BRANCH __JIT_ERROR_TARGET + // 16: 5d popq %rbp + const unsigned char code_body[23] = { + 0x55, 0x48, 0x89, 0xe5, 0x48, 0x8b, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x38, 0x01, 0x75, 0x06, + 0x5d, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x5d, + }; + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); + memcpy(code, code_body, sizeof(code_body)); + patch_x86_64_32rx(code + 0x7, (uintptr_t)data + -0x4); + patch_32r(code + 0x12, state->instruction_starts[instruction->error_target] + -0x4); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 43, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 5, 0, {0}}, + [1] = {emit_1, 41, 32, {0}}, + [2] = {emit_2, 23, 8, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; diff --git a/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h b/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h new file mode 100644 index 00000000000000..356055584e4d61 --- /dev/null +++ b/Tools/jit/test/test_jit_stencils-x86_64-pc-windows-msvc.h @@ -0,0 +1,166 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 41 57 pushq %r15 + // 2: 41 56 pushq %r14 + // 4: 41 55 pushq %r13 + // 6: 41 54 pushq %r12 + // 8: 56 pushq %rsi + // 9: 57 pushq %rdi + // a: 53 pushq %rbx + // b: 48 81 ec a0 00 00 00 subq $0xa0, %rsp + // 12: 44 0f 29 bc 24 90 00 00 00 movaps %xmm15, 0x90(%rsp) + // 1b: 44 0f 29 b4 24 80 00 00 00 movaps %xmm14, 0x80(%rsp) + // 24: 44 0f 29 6c 24 70 movaps %xmm13, 0x70(%rsp) + // 2a: 44 0f 29 64 24 60 movaps %xmm12, 0x60(%rsp) + // 30: 44 0f 29 5c 24 50 movaps %xmm11, 0x50(%rsp) + // 36: 44 0f 29 54 24 40 movaps %xmm10, 0x40(%rsp) + // 3c: 44 0f 29 4c 24 30 movaps %xmm9, 0x30(%rsp) + // 42: 44 0f 29 44 24 20 movaps %xmm8, 0x20(%rsp) + // 48: 0f 29 7c 24 10 movaps %xmm7, 0x10(%rsp) + // 4d: 0f 29 34 24 movaps %xmm6, (%rsp) + // 51: 49 89 cc movq %rcx, %r12 + // 54: 49 89 d5 movq %rdx, %r13 + // 57: 4d 89 c6 movq %r8, %r14 + // 5a: e8 52 00 00 00 callq 0xb1 <_JIT_ENTRY+0xb1> + // 5f: 0f 28 34 24 movaps (%rsp), %xmm6 + // 63: 0f 28 7c 24 10 movaps 0x10(%rsp), %xmm7 + // 68: 44 0f 28 44 24 20 movaps 0x20(%rsp), %xmm8 + // 6e: 44 0f 28 4c 24 30 movaps 0x30(%rsp), %xmm9 + // 74: 44 0f 28 54 24 40 movaps 0x40(%rsp), %xmm10 + // 7a: 44 0f 28 5c 24 50 movaps 0x50(%rsp), %xmm11 + // 80: 44 0f 28 64 24 60 movaps 0x60(%rsp), %xmm12 + // 86: 44 0f 28 6c 24 70 movaps 0x70(%rsp), %xmm13 + // 8c: 44 0f 28 b4 24 80 00 00 00 movaps 0x80(%rsp), %xmm14 + // 95: 44 0f 28 bc 24 90 00 00 00 movaps 0x90(%rsp), %xmm15 + // 9e: 48 81 c4 a0 00 00 00 addq $0xa0, %rsp + // a5: 5b popq %rbx + // a6: 5f popq %rdi + // a7: 5e popq %rsi + // a8: 41 5c popq %r12 + // aa: 41 5d popq %r13 + // ac: 41 5e popq %r14 + // ae: 41 5f popq %r15 + // b0: c3 retq + const unsigned char code_body[177] = { + 0x41, 0x57, 0x41, 0x56, 0x41, 0x55, 0x41, 0x54, + 0x56, 0x57, 0x53, 0x48, 0x81, 0xec, 0xa0, 0x00, + 0x00, 0x00, 0x44, 0x0f, 0x29, 0xbc, 0x24, 0x90, + 0x00, 0x00, 0x00, 0x44, 0x0f, 0x29, 0xb4, 0x24, + 0x80, 0x00, 0x00, 0x00, 0x44, 0x0f, 0x29, 0x6c, + 0x24, 0x70, 0x44, 0x0f, 0x29, 0x64, 0x24, 0x60, + 0x44, 0x0f, 0x29, 0x5c, 0x24, 0x50, 0x44, 0x0f, + 0x29, 0x54, 0x24, 0x40, 0x44, 0x0f, 0x29, 0x4c, + 0x24, 0x30, 0x44, 0x0f, 0x29, 0x44, 0x24, 0x20, + 0x0f, 0x29, 0x7c, 0x24, 0x10, 0x0f, 0x29, 0x34, + 0x24, 0x49, 0x89, 0xcc, 0x49, 0x89, 0xd5, 0x4d, + 0x89, 0xc6, 0xe8, 0x52, 0x00, 0x00, 0x00, 0x0f, + 0x28, 0x34, 0x24, 0x0f, 0x28, 0x7c, 0x24, 0x10, + 0x44, 0x0f, 0x28, 0x44, 0x24, 0x20, 0x44, 0x0f, + 0x28, 0x4c, 0x24, 0x30, 0x44, 0x0f, 0x28, 0x54, + 0x24, 0x40, 0x44, 0x0f, 0x28, 0x5c, 0x24, 0x50, + 0x44, 0x0f, 0x28, 0x64, 0x24, 0x60, 0x44, 0x0f, + 0x28, 0x6c, 0x24, 0x70, 0x44, 0x0f, 0x28, 0xb4, + 0x24, 0x80, 0x00, 0x00, 0x00, 0x44, 0x0f, 0x28, + 0xbc, 0x24, 0x90, 0x00, 0x00, 0x00, 0x48, 0x81, + 0xc4, 0xa0, 0x00, 0x00, 0x00, 0x5b, 0x5f, 0x5e, + 0x41, 0x5c, 0x41, 0x5d, 0x41, 0x5e, 0x41, 0x5f, + 0xc3, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 48 83 ec 28 subq $0x28, %rsp + // 4: 48 8b 05 00 00 00 00 movq (%rip), %rax # 0xb <_JIT_ENTRY+0xb> + // 0000000000000007: IMAGE_REL_AMD64_REL32 __imp_sausage + // b: 80 38 00 cmpb $0x0, (%rax) + // e: 74 08 je 0x18 <_JIT_ENTRY+0x18> + // 10: ff 15 00 00 00 00 callq *(%rip) # 0x16 <_JIT_ENTRY+0x16> + // 0000000000000012: IMAGE_REL_AMD64_REL32 __imp_order_eggs_sausage_and_bacon + // 16: eb 06 jmp 0x1e <_JIT_ENTRY+0x1e> + // 18: ff 15 00 00 00 00 callq *(%rip) # 0x1e <_JIT_ENTRY+0x1e> + // 000000000000001a: IMAGE_REL_AMD64_REL32 __imp_order_eggs_and_bacon + // 1e: 48 8b 05 00 00 00 00 movq (%rip), %rax # 0x25 <_JIT_ENTRY+0x25> + // 0000000000000021: IMAGE_REL_AMD64_REL32 __imp_spammed + // 25: c6 00 00 movb $0x0, (%rax) + // 28: 48 83 c4 28 addq $0x28, %rsp + const unsigned char code_body[44] = { + 0x48, 0x83, 0xec, 0x28, 0x48, 0x8b, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x38, 0x00, 0x74, 0x08, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0xeb, 0x06, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, + 0x05, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, + 0x48, 0x83, 0xc4, 0x28, + }; + // 0: &sausage+0x0 + // 8: &order_eggs_sausage_and_bacon+0x0 + // 10: &order_eggs_and_bacon+0x0 + // 18: &spammed+0x0 + patch_64(data + 0x0, (uintptr_t)&sausage); + patch_64(data + 0x8, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x10, (uintptr_t)&order_eggs_and_bacon); + patch_64(data + 0x18, (uintptr_t)&spammed); + memcpy(code, code_body, sizeof(code_body)); + patch_x86_64_32rx(code + 0x7, (uintptr_t)data + -0x4); + patch_x86_64_32rx(code + 0x12, (uintptr_t)data + 0x4); + patch_x86_64_32rx(code + 0x1a, (uintptr_t)data + 0xc); + patch_x86_64_32rx(code + 0x21, (uintptr_t)data + 0x14); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 48 8b 05 00 00 00 00 movq (%rip), %rax # 0x7 <_JIT_ENTRY+0x7> + // 0000000000000003: IMAGE_REL_AMD64_REL32 __imp_spam + // 7: 80 38 01 cmpb $0x1, (%rax) + // a: 0f 84 00 00 00 00 je 0x10 <_JIT_ENTRY+0x10> + // 000000000000000c: IMAGE_REL_AMD64_REL32 _JIT_ERROR_TARGET + const unsigned char code_body[16] = { + 0x48, 0x8b, 0x05, 0x00, 0x00, 0x00, 0x00, 0x80, + 0x38, 0x01, 0x0f, 0x84, + }; + // 0: &spam+0x0 + patch_64(data + 0x0, (uintptr_t)&spam); + memcpy(code, code_body, sizeof(code_body)); + patch_x86_64_32rx(code + 0x3, (uintptr_t)data + -0x4); + patch_x86_64_32rx(code + 0xc, state->instruction_starts[instruction->error_target] + -0x4); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 177, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 44, 32, {0}}, + [2] = {emit_2, 16, 8, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +}; diff --git a/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h b/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h new file mode 100644 index 00000000000000..075ecac8be499b --- /dev/null +++ b/Tools/jit/test/test_jit_stencils-x86_64-unknown-linux-gnu.h @@ -0,0 +1,116 @@ +void +emit_shim( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 41 57 pushq %r15 + // 2: 41 56 pushq %r14 + // 4: 41 55 pushq %r13 + // 6: 41 54 pushq %r12 + // 8: 53 pushq %rbx + // 9: 49 89 fc movq %rdi, %r12 + // c: 49 89 f5 movq %rsi, %r13 + // f: 49 89 d6 movq %rdx, %r14 + // 12: e8 0a 00 00 00 callq 0x21 <_JIT_ENTRY+0x21> + // 17: 5b popq %rbx + // 18: 41 5c popq %r12 + // 1a: 41 5d popq %r13 + // 1c: 41 5e popq %r14 + // 1e: 41 5f popq %r15 + // 20: c3 retq + const unsigned char code_body[33] = { + 0x41, 0x57, 0x41, 0x56, 0x41, 0x55, 0x41, 0x54, + 0x53, 0x49, 0x89, 0xfc, 0x49, 0x89, 0xf5, 0x49, + 0x89, 0xd6, 0xe8, 0x0a, 0x00, 0x00, 0x00, 0x5b, + 0x41, 0x5c, 0x41, 0x5d, 0x41, 0x5e, 0x41, 0x5f, + 0xc3, + }; + memcpy(code, code_body, sizeof(code_body)); +} + +void +emit_0( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ +} + +void +emit_1( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 50 pushq %rax + // 1: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000003: R_X86_64_64 sausage + // b: 80 38 00 cmpb $0x0, (%rax) + // e: 74 08 je 0x18 <_JIT_ENTRY+0x18> + // 10: ff 15 00 00 00 00 callq *(%rip) # 0x16 <_JIT_ENTRY+0x16> + // 0000000000000012: R_X86_64_GOTPCRELX order_eggs_sausage_and_bacon-0x4 + // 16: eb 06 jmp 0x1e <_JIT_ENTRY+0x1e> + // 18: ff 15 00 00 00 00 callq *(%rip) # 0x1e <_JIT_ENTRY+0x1e> + // 000000000000001a: R_X86_64_GOTPCRELX order_eggs_and_bacon-0x4 + // 1e: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000020: R_X86_64_64 spammed + // 28: c6 00 00 movb $0x0, (%rax) + // 2b: 58 popq %rax + const unsigned char code_body[44] = { + 0x50, 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x38, 0x00, 0x74, 0x08, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0xeb, 0x06, + 0xff, 0x15, 0x00, 0x00, 0x00, 0x00, 0x48, 0xb8, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xc6, 0x00, 0x00, 0x58, + }; + // 0: &order_eggs_sausage_and_bacon+0x0 + // 8: &order_eggs_and_bacon+0x0 + patch_64(data + 0x0, (uintptr_t)&order_eggs_sausage_and_bacon); + patch_64(data + 0x8, (uintptr_t)&order_eggs_and_bacon); + memcpy(code, code_body, sizeof(code_body)); + patch_64(code + 0x3, (uintptr_t)&sausage); + patch_x86_64_32rx(code + 0x12, (uintptr_t)data + -0x4); + patch_x86_64_32rx(code + 0x1a, (uintptr_t)data + 0x4); + patch_64(code + 0x20, (uintptr_t)&spammed); +} + +void +emit_2( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state) +{ + // 0: 48 b8 00 00 00 00 00 00 00 00 movabsq $0x0, %rax + // 0000000000000002: R_X86_64_64 spam + // a: 80 38 01 cmpb $0x1, (%rax) + // d: 0f 84 00 00 00 00 je 0x13 <_JIT_ENTRY+0x13> + // 000000000000000f: R_X86_64_PLT32 _JIT_ERROR_TARGET-0x4 + const unsigned char code_body[19] = { + 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x38, 0x01, 0x0f, 0x84, + }; + memcpy(code, code_body, sizeof(code_body)); + patch_64(code + 0x2, (uintptr_t)&spam); + patch_32r(code + 0xf, state->instruction_starts[instruction->error_target] + -0x4); +} + +static_assert(SYMBOL_MASK_WORDS >= 1, "SYMBOL_MASK_WORDS too small"); + +typedef struct { + void (*emit)( + unsigned char *code, unsigned char *data, _PyExecutorObject *executor, + const _PyUOpInstruction *instruction, jit_state *state); + size_t code_size; + size_t data_size; + symbol_mask trampoline_mask; +} StencilGroup; + +static const StencilGroup shim = {emit_shim, 33, 0, {0}}; + +static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = { + [0] = {emit_0, 0, 0, {0}}, + [1] = {emit_1, 44, 16, {0}}, + [2] = {emit_2, 19, 0, {0}}, +}; + +static const void * const symbols_map[1] = { + 0 +};