Skip to content

Commit d715cf2

Browse files
Trampoline attempt
1 parent c78af6f commit d715cf2

File tree

2 files changed

+64
-5
lines changed

2 files changed

+64
-5
lines changed

Python/jit.c

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -216,11 +216,7 @@ patch_32r(unsigned char *location, uint64_t value)
216216
value -= (uintptr_t)location;
217217
// Check that we're not out of range of 32 signed bits:
218218
assert((int64_t)value >= -(1LL << 31));
219-
// assert((int64_t)value < (1LL << 31));
220-
if ((int64_t)value >= (1LL << 31)) {
221-
__builtin_debugtrap();
222-
}
223-
219+
assert((int64_t)value < (1LL << 31));
224220
*loc32 = (uint32_t)value;
225221

226222
}
@@ -424,12 +420,17 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value)
424420
}
425421

426422
void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state);
423+
void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state);
427424

428425
#include "jit_stencils.h"
429426

430427
#if defined(__aarch64__) || defined(_M_ARM64)
431428
#define TRAMPOLINE_SIZE 16
432429
#define DATA_ALIGN 8
430+
#elif defined(__x86_64__) && defined(__APPLE__)
431+
// x86_64 trampolines: jmp *(%rip); .quad address (6 bytes + 8 bytes = 14 bytes)
432+
#define TRAMPOLINE_SIZE 16 // Round up to 16 for alignment
433+
#define DATA_ALIGN 16
433434
#else
434435
#define TRAMPOLINE_SIZE 0
435436
#define DATA_ALIGN 1
@@ -481,6 +482,47 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
481482
patch_aarch64_26r(location, (uintptr_t)p);
482483
}
483484

485+
// Generate and patch x86_64 trampolines.
486+
void
487+
patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state)
488+
{
489+
uint64_t value = (uintptr_t)symbols_map[ordinal];
490+
int64_t range = (int64_t)value - 4 - (int64_t)location;
491+
492+
// If we are in range of 32 signed bits, patch directly
493+
if (range >= -(1LL << 31) && range < (1LL << 31)) {
494+
patch_32r(location, value - 4);
495+
return;
496+
}
497+
498+
// Out of range - need a trampoline
499+
const uint32_t symbol_mask = 1 << (ordinal % 32);
500+
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
501+
assert(symbol_mask & trampoline_mask);
502+
503+
// Count the number of set bits in the trampoline mask lower than ordinal
504+
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
505+
for (int i = 0; i < ordinal / 32; i++) {
506+
index += _Py_popcount32(state->trampolines.mask[i]);
507+
}
508+
509+
unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE;
510+
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
511+
512+
/* Generate the trampoline (14 bytes, padded to 16):
513+
0: ff 25 00 00 00 00 jmp *(%rip) # Jump to address at offset 6
514+
6: XX XX XX XX XX XX XX XX .quad value (64-bit address)
515+
*/
516+
trampoline[0] = 0xFF; // jmp opcode
517+
trampoline[1] = 0x25; // ModRM byte for jmp *disp32(%rip)
518+
// Offset 0: the address is right after this instruction (at offset 6)
519+
*(uint32_t *)(trampoline + 2) = 0;
520+
*(uint64_t *)(trampoline + 6) = value;
521+
522+
// Patch the call site to call the trampoline instead
523+
patch_32r(location, (uintptr_t)trampoline - 4);
524+
}
525+
484526
static void
485527
combine_symbol_mask(const symbol_mask src, symbol_mask dest)
486528
{

Tools/jit/_stencils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,23 @@ def process_relocations(self, known_symbols: dict[str, int]) -> None:
239239
self._trampolines.add(ordinal)
240240
hole.addend = ordinal
241241
hole.symbol = None
242+
# x86_64 Darwin trampolines for external symbols
243+
elif (
244+
hole.kind == "X86_64_RELOC_BRANCH"
245+
and hole.value is HoleValue.ZERO
246+
and hole.symbol not in self.symbols
247+
):
248+
hole.func = "patch_x86_64_trampoline"
249+
hole.need_state = True
250+
assert hole.symbol is not None
251+
if hole.symbol in known_symbols:
252+
ordinal = known_symbols[hole.symbol]
253+
else:
254+
ordinal = len(known_symbols)
255+
known_symbols[hole.symbol] = ordinal
256+
self._trampolines.add(ordinal)
257+
hole.addend = ordinal
258+
hole.symbol = None
242259
self.data.pad(8)
243260
for stencil in [self.code, self.data]:
244261
for hole in stencil.holes:

0 commit comments

Comments
 (0)