@@ -216,11 +216,7 @@ patch_32r(unsigned char *location, uint64_t value)
216216 value -= (uintptr_t )location ;
217217 // Check that we're not out of range of 32 signed bits:
218218 assert ((int64_t )value >= - (1LL << 31 ));
219- // assert((int64_t)value < (1LL << 31));
220- if ((int64_t )value >= (1LL << 31 )) {
221- __builtin_debugtrap ();
222- }
223-
219+ assert ((int64_t )value < (1LL << 31 ));
224220 * loc32 = (uint32_t )value ;
225221
226222}
@@ -424,12 +420,17 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value)
424420}
425421
426422void patch_aarch64_trampoline (unsigned char * location , int ordinal , jit_state * state );
423+ void patch_x86_64_trampoline (unsigned char * location , int ordinal , jit_state * state );
427424
428425#include "jit_stencils.h"
429426
430427#if defined(__aarch64__ ) || defined(_M_ARM64 )
431428 #define TRAMPOLINE_SIZE 16
432429 #define DATA_ALIGN 8
430+ #elif defined(__x86_64__ ) && defined(__APPLE__ )
431+ // x86_64 trampolines: jmp *(%rip); .quad address (6 bytes + 8 bytes = 14 bytes)
432+ #define TRAMPOLINE_SIZE 16 // Round up to 16 for alignment
433+ #define DATA_ALIGN 16
433434#else
434435 #define TRAMPOLINE_SIZE 0
435436 #define DATA_ALIGN 1
@@ -481,6 +482,47 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
481482 patch_aarch64_26r (location , (uintptr_t )p );
482483}
483484
485+ // Generate and patch x86_64 trampolines.
486+ void
487+ patch_x86_64_trampoline (unsigned char * location , int ordinal , jit_state * state )
488+ {
489+ uint64_t value = (uintptr_t )symbols_map [ordinal ];
490+ int64_t range = (int64_t )value - 4 - (int64_t )location ;
491+
492+ // If we are in range of 32 signed bits, patch directly
493+ if (range >= - (1LL << 31 ) && range < (1LL << 31 )) {
494+ patch_32r (location , value - 4 );
495+ return ;
496+ }
497+
498+ // Out of range - need a trampoline
499+ const uint32_t symbol_mask = 1 << (ordinal % 32 );
500+ const uint32_t trampoline_mask = state -> trampolines .mask [ordinal / 32 ];
501+ assert (symbol_mask & trampoline_mask );
502+
503+ // Count the number of set bits in the trampoline mask lower than ordinal
504+ int index = _Py_popcount32 (trampoline_mask & (symbol_mask - 1 ));
505+ for (int i = 0 ; i < ordinal / 32 ; i ++ ) {
506+ index += _Py_popcount32 (state -> trampolines .mask [i ]);
507+ }
508+
509+ unsigned char * trampoline = state -> trampolines .mem + index * TRAMPOLINE_SIZE ;
510+ assert ((size_t )(index + 1 ) * TRAMPOLINE_SIZE <= state -> trampolines .size );
511+
512+ /* Generate the trampoline (14 bytes, padded to 16):
513+ 0: ff 25 00 00 00 00 jmp *(%rip) # Jump to address at offset 6
514+ 6: XX XX XX XX XX XX XX XX .quad value (64-bit address)
515+ */
516+ trampoline [0 ] = 0xFF ; // jmp opcode
517+ trampoline [1 ] = 0x25 ; // ModRM byte for jmp *disp32(%rip)
518+ // Offset 0: the address is right after this instruction (at offset 6)
519+ * (uint32_t * )(trampoline + 2 ) = 0 ;
520+ * (uint64_t * )(trampoline + 6 ) = value ;
521+
522+ // Patch the call site to call the trampoline instead
523+ patch_32r (location , (uintptr_t )trampoline - 4 );
524+ }
525+
484526static void
485527combine_symbol_mask (const symbol_mask src , symbol_mask dest )
486528{
0 commit comments