9797 * /tmp/jitted-PID-0.so: [headers][.text][unwind_info][padding] 
9898 * /tmp/jitted-PID-1.so:                                       [headers][.text][unwind_info][padding] 
9999 * 
100-  * The padding size (0x100)  is chosen to accommodate typical unwind info sizes  
101-  * while maintaining 16-byte alignment  requirements. 
100+  * The padding size is now calculated automatically during initialization  
101+  * based on the actual unwind information  requirements. 
102102 */ 
103- #define  PERF_JIT_CODE_PADDING  0x100
104103
105104/* Convenient access to the global trampoline API state */ 
106105#define  trampoline_api  _PyRuntime.ceval.perf.trampoline_api
@@ -401,10 +400,12 @@ enum {
401400    DWRF_CFA_nop  =  0x0 ,                    // No operation 
402401    DWRF_CFA_offset_extended  =  0x5 ,        // Extended offset instruction 
403402    DWRF_CFA_def_cfa  =  0xc ,               // Define CFA rule 
403+     DWRF_CFA_def_cfa_register  =  0xd ,      // Define CFA register 
404404    DWRF_CFA_def_cfa_offset  =  0xe ,        // Define CFA offset 
405405    DWRF_CFA_offset_extended_sf  =  0x11 ,   // Extended signed offset 
406406    DWRF_CFA_advance_loc  =  0x40 ,          // Advance location counter 
407-     DWRF_CFA_offset  =  0x80                 // Simple offset instruction 
407+     DWRF_CFA_offset  =  0x80 ,               // Simple offset instruction 
408+     DWRF_CFA_restore  =  0xc0                // Restore register 
408409};
409410
410411/* DWARF Exception Handling pointer encodings */ 
@@ -519,6 +520,7 @@ typedef struct ELFObjectContext {
519520    uint8_t *  p ;            // Current write position in buffer 
520521    uint8_t *  startp ;       // Start of buffer (for offset calculations) 
521522    uint8_t *  eh_frame_p ;   // Start of EH frame data (for relative offsets) 
523+     uint8_t *  fde_p ;        // Start of FDE data (for PC-relative calculations) 
522524    uint32_t  code_size ;    // Size of the code being described 
523525} ELFObjectContext ;
524526
@@ -643,6 +645,8 @@ static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) {
643645//                              DWARF EH FRAME GENERATION 
644646// ============================================================================= 
645647
648+ static  void  elf_init_ehframe (ELFObjectContext *  ctx );
649+ 
646650/* 
647651 * Initialize DWARF .eh_frame section for a code region 
648652 * 
@@ -657,6 +661,23 @@ static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) {
657661 * Args: 
658662 *   ctx: ELF object context containing code size and buffer pointers 
659663 */ 
664+ static  size_t  calculate_eh_frame_size (void ) {
665+     /* Calculate the EH frame size for the trampoline function */ 
666+     extern  void  * _Py_trampoline_func_start ;
667+     extern  void  * _Py_trampoline_func_end ;
668+ 
669+     size_t  code_size  =  (char * )& _Py_trampoline_func_end  -  (char * )& _Py_trampoline_func_start ;
670+ 
671+     ELFObjectContext  ctx ;
672+     char  buffer [1024 ];  // Buffer for DWARF data (1KB should be sufficient) 
673+     ctx .code_size  =  code_size ;
674+     ctx .startp  =  ctx .p  =  (uint8_t * )buffer ;
675+     ctx .fde_p  =  NULL ;
676+ 
677+     elf_init_ehframe (& ctx );
678+     return  ctx .p  -  ctx .startp ;
679+ }
680+ 
660681static  void  elf_init_ehframe (ELFObjectContext *  ctx ) {
661682    uint8_t *  p  =  ctx -> p ;
662683    uint8_t *  framep  =  p ;  // Remember start of frame data 
@@ -784,7 +805,7 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
784805    * 
785806    *     DWRF_SECTION(FDE, 
786807    *         DWRF_U32((uint32_t)(p - framep));       // Offset to CIE (relative from here) 
787-     *         DWRF_U32(-0x30 );                         // Initial  PC-relative location of the code 
808+     *         DWRF_U32(pc_relative_offset );           // PC-relative location of the code (calculated dynamically)  
788809    *         DWRF_U32(ctx->code_size);               // Code range covered by this FDE 
789810    *         DWRF_U8(0);                             // Augmentation data length (none) 
790811    * 
@@ -830,19 +851,31 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
830851        DWRF_U32 (0 );                           // CIE ID (0 indicates this is a CIE) 
831852        DWRF_U8 (DWRF_CIE_VERSION );            // CIE version (1) 
832853        DWRF_STR ("zR" );                       // Augmentation string ("zR" = has LSDA) 
833-         DWRF_UV (1 );                           // Code alignment factor 
854+ #ifdef  __x86_64__ 
855+         DWRF_UV (1 );                           // Code alignment factor (x86_64: 1 byte) 
856+ #elif  defined(__aarch64__ ) &&  defined(__AARCH64EL__ ) &&  !defined(__ILP32__ )
857+         DWRF_UV (4 );                           // Code alignment factor (AArch64: 4 bytes per instruction) 
858+ #endif 
834859        DWRF_SV (- (int64_t )sizeof (uintptr_t )); // Data alignment factor (negative) 
835860        DWRF_U8 (DWRF_REG_RA );                 // Return address register number 
836861        DWRF_UV (1 );                           // Augmentation data length 
837862        DWRF_U8 (DWRF_EH_PE_pcrel  | DWRF_EH_PE_sdata4 ); // FDE pointer encoding 
838863
839864        /* Initial CFI instructions - describe default calling convention */ 
865+ #ifdef  __x86_64__ 
866+         /* x86_64 initial CFI state */ 
840867        DWRF_U8 (DWRF_CFA_def_cfa );            // Define CFA (Call Frame Address) 
841868        DWRF_UV (DWRF_REG_SP );                 // CFA = SP register 
842869        DWRF_UV (sizeof (uintptr_t ));           // CFA = SP + pointer_size 
843870        DWRF_U8 (DWRF_CFA_offset |DWRF_REG_RA ); // Return address is saved 
844871        DWRF_UV (1 );                           // At offset 1 from CFA 
845- 
872+ #elif  defined(__aarch64__ ) &&  defined(__AARCH64EL__ ) &&  !defined(__ILP32__ )
873+         /* AArch64 initial CFI state */ 
874+         DWRF_U8 (DWRF_CFA_def_cfa );            // Define CFA (Call Frame Address) 
875+         DWRF_UV (DWRF_REG_SP );                 // CFA = SP register 
876+         DWRF_UV (0 );                           // CFA = SP + 0 (AArch64 starts with offset 0) 
877+         // No initial register saves in AArch64 CIE 
878+ #endif 
846879        DWRF_ALIGNNOP (sizeof (uintptr_t ));     // Align to pointer boundary 
847880    )
848881
@@ -853,11 +886,15 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
853886     * 
854887     * The FDE describes unwinding information specific to this function. 
855888     * It references the CIE and provides function-specific CFI instructions. 
889+      * 
890+      * The PC-relative offset is calculated after the entire EH frame is built 
891+      * to ensure accurate positioning relative to the synthesized DSO layout. 
856892     */ 
857893    DWRF_SECTION (FDE ,
858894        DWRF_U32 ((uint32_t )(p  -  framep ));     // Offset to CIE (backwards reference) 
859-         DWRF_U32 (-0x30 );                      // Machine code offset relative to .text 
860-         DWRF_U32 (ctx -> code_size );             // Address range covered by this FDE (code lenght) 
895+         ctx -> fde_p  =  p ;                        // Remember where PC offset field is located for later calculation 
896+         DWRF_U32 (0 );                           // Placeholder for PC-relative offset (calculated at end of elf_init_ehframe) 
897+         DWRF_U32 (ctx -> code_size );             // Address range covered by this FDE (code length) 
861898        DWRF_U8 (0 );                           // Augmentation data length (none) 
862899
863900        /* 
@@ -868,32 +905,36 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
868905         * conventions and register usage patterns. 
869906         */ 
870907#ifdef  __x86_64__ 
871-         /* x86_64 calling convention unwinding rules */ 
908+         /* x86_64 calling convention unwinding rules with frame pointer  */ 
872909#  if  defined(__CET__ ) &&  (__CET__  &  1 )
873-         DWRF_U8 (DWRF_CFA_advance_loc  | 8 );    // Advance location by 8 bytes when CET protection is enabled 
874- #  else 
875-         DWRF_U8 (DWRF_CFA_advance_loc  | 4 );    // Advance location by 4 bytes 
910+         DWRF_U8 (DWRF_CFA_advance_loc  | 4 );    // Advance past endbr64 (4 bytes) 
876911#  endif 
877-         DWRF_U8 (DWRF_CFA_def_cfa_offset );     // Redefine CFA offset 
912+         DWRF_U8 (DWRF_CFA_advance_loc  | 1 );    // Advance past push %rbp (1 byte) 
913+         DWRF_U8 (DWRF_CFA_def_cfa_offset );     // def_cfa_offset 16 
878914        DWRF_UV (16 );                          // New offset: SP + 16 
879-         DWRF_U8 (DWRF_CFA_advance_loc  | 6 );    // Advance location by 6 bytes 
880-         DWRF_U8 (DWRF_CFA_def_cfa_offset );     // Redefine CFA offset 
915+         DWRF_U8 (DWRF_CFA_offset  | DWRF_REG_BP ); // offset r6 at cfa-16 
916+         DWRF_UV (2 );                           // Offset factor: 2 * 8 = 16 bytes 
917+         DWRF_U8 (DWRF_CFA_advance_loc  | 3 );    // Advance past mov %rsp,%rbp (3 bytes) 
918+         DWRF_U8 (DWRF_CFA_def_cfa_register );   // def_cfa_register r6 
919+         DWRF_UV (DWRF_REG_BP );                 // Use base pointer register 
920+         DWRF_U8 (DWRF_CFA_advance_loc  | 3 );    // Advance past call *%rcx (2 bytes) + pop %rbp (1 byte) = 3 
921+         DWRF_U8 (DWRF_CFA_def_cfa );            // def_cfa r7 ofs 8 
922+         DWRF_UV (DWRF_REG_SP );                 // Use stack pointer register 
881923        DWRF_UV (8 );                           // New offset: SP + 8 
882924#elif  defined(__aarch64__ ) &&  defined(__AARCH64EL__ ) &&  !defined(__ILP32__ )
883925        /* AArch64 calling convention unwinding rules */ 
884-         DWRF_U8 (DWRF_CFA_advance_loc  | 1 );        // Advance location by 1 instruction (stp x29, x30) 
885-         DWRF_U8 (DWRF_CFA_def_cfa_offset );         // Redefine CFA offset 
886-         DWRF_UV (16 );                              // CFA = SP + 16 (stack pointer after push) 
887-         DWRF_U8 (DWRF_CFA_offset  | DWRF_REG_FP );   // Frame pointer (x29) saved 
888-         DWRF_UV (2 );                               // At offset 2 from CFA (2 * 8 = 16 bytes) 
889-         DWRF_U8 (DWRF_CFA_offset  | DWRF_REG_RA );   // Link register (x30) saved 
890-         DWRF_UV (1 );                               // At offset 1 from CFA (1 * 8 = 8 bytes) 
891-         DWRF_U8 (DWRF_CFA_advance_loc  | 3 );        // Advance by 3 instructions (mov x16, x3; mov x29, sp; ldp...) 
892-         DWRF_U8 (DWRF_CFA_offset  | DWRF_REG_FP );   // Restore frame pointer (x29) 
893-         DWRF_U8 (DWRF_CFA_offset  | DWRF_REG_RA );   // Restore link register (x30) 
894-         DWRF_U8 (DWRF_CFA_def_cfa_offset );         // Final CFA adjustment 
895-         DWRF_UV (0 );                               // CFA = SP + 0 (stack restored) 
896- 
926+         DWRF_U8 (DWRF_CFA_advance_loc  | 1 );        // Advance by 1 instruction (4 bytes) 
927+         DWRF_U8 (DWRF_CFA_def_cfa_offset );         // CFA = SP + 16 
928+         DWRF_UV (16 );                              // Stack pointer moved by 16 bytes 
929+         DWRF_U8 (DWRF_CFA_offset  | DWRF_REG_FP );   // x29 (frame pointer) saved 
930+         DWRF_UV (2 );                               // At CFA-16 (2 * 8 = 16 bytes from CFA) 
931+         DWRF_U8 (DWRF_CFA_offset  | DWRF_REG_RA );   // x30 (link register) saved 
932+         DWRF_UV (1 );                               // At CFA-8 (1 * 8 = 8 bytes from CFA) 
933+         DWRF_U8 (DWRF_CFA_advance_loc  | 3 );        // Advance by 3 instructions (12 bytes) 
934+         DWRF_U8 (DWRF_CFA_restore  | DWRF_REG_RA );  // Restore x30 - NO DWRF_UV() after this! 
935+         DWRF_U8 (DWRF_CFA_restore  | DWRF_REG_FP );  // Restore x29 - NO DWRF_UV() after this! 
936+         DWRF_U8 (DWRF_CFA_def_cfa_offset );         // CFA = SP + 0 (stack restored) 
937+         DWRF_UV (0 );                               // Back to original stack position 
897938#else 
898939#    error  "Unsupported target architecture"
899940#endif 
@@ -902,6 +943,58 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
902943    )
903944
904945    ctx -> p  =  p ;  // Update context pointer to end of generated data 
946+ 
947+     /* Calculate and update the PC-relative offset in the FDE 
948+      * 
949+      * When perf processes the jitdump, it creates a synthesized DSO with this layout: 
950+      * 
951+      *     Synthesized DSO Memory Layout: 
952+      *     ┌─────────────────────────────────────────────────────────────┐ < code_start 
953+      *     │                        Code Section                         │ 
954+      *     │                    (round_up(code_size, 8) bytes)           │ 
955+      *     ├─────────────────────────────────────────────────────────────┤ < start of EH frame data 
956+      *     │                      EH Frame Data                          │ 
957+      *     │  ┌─────────────────────────────────────────────────────┐    │ 
958+      *     │  │                 CIE data                            │    │ 
959+      *     │  └─────────────────────────────────────────────────────┘    │ 
960+      *     │  ┌─────────────────────────────────────────────────────┐    │ 
961+      *     │  │ FDE Header:                                         │    │ 
962+      *     │  │   - CIE offset (4 bytes)                            │    │ 
963+      *     │  │   - PC offset (4 bytes) <─ fde_offset_in_frame ─────┼────┼─> points to code_start 
964+      *     │  │   - address range (4 bytes)                         │    │   (this specific field) 
965+      *     │  │ CFI Instructions...                                 │    │ 
966+      *     │  └─────────────────────────────────────────────────────┘    │ 
967+      *     ├─────────────────────────────────────────────────────────────┤ < reference_point 
968+      *     │                    EhFrameHeader                            │ 
969+      *     │                 (navigation metadata)                       │ 
970+      *     └─────────────────────────────────────────────────────────────┘ 
971+      * 
972+      * The PC offset field in the FDE must contain the distance from itself to code_start: 
973+      * 
974+      *   distance = code_start - fde_pc_field 
975+      * 
976+      * Where: 
977+      *   fde_pc_field_location = reference_point - eh_frame_size + fde_offset_in_frame 
978+      *   code_start_location = reference_point - eh_frame_size - round_up(code_size, 8) 
979+      * 
980+      * Therefore: 
981+      *   distance = code_start_location - fde_pc_field_location 
982+      *            = (ref - eh_frame_size - rounded_code_size) - (ref - eh_frame_size + fde_offset_in_frame) 
983+      *            = -rounded_code_size - fde_offset_in_frame 
984+      *            = -(round_up(code_size, 8) + fde_offset_in_frame) 
985+      * 
986+      * Note: fde_offset_in_frame is the offset from EH frame start to the PC offset field, 
987+      * 
988+      */ 
989+     if  (ctx -> fde_p  !=  NULL ) {
990+         int32_t  fde_offset_in_frame  =  (ctx -> fde_p  -  ctx -> startp );
991+         int32_t  rounded_code_size  =  round_up (ctx -> code_size , 8 );
992+         int32_t  pc_relative_offset  =  - (rounded_code_size  +  fde_offset_in_frame );
993+ 
994+ 
995+         // Update the PC-relative offset in the FDE 
996+         * (int32_t * )ctx -> fde_p  =  pc_relative_offset ;
997+     }
905998}
906999
9071000// ============================================================================= 
@@ -1002,8 +1095,10 @@ static void* perf_map_jit_init(void) {
10021095    /* Initialize code ID counter */ 
10031096    perf_jit_map_state .code_id  =  0 ;
10041097
1005-     /* Configure trampoline API with padding information */ 
1006-     trampoline_api .code_padding  =  PERF_JIT_CODE_PADDING ;
1098+     /* Calculate padding size based on actual unwind info requirements */ 
1099+     size_t  eh_frame_size  =  calculate_eh_frame_size ();
1100+     size_t  unwind_data_size  =  sizeof (EhFrameHeader ) +  eh_frame_size ;
1101+     trampoline_api .code_padding  =  round_up (unwind_data_size , 16 );
10071102
10081103    return  & perf_jit_map_state ;
10091104}
@@ -1092,6 +1187,7 @@ static void perf_map_jit_write_entry(void *state, const void *code_addr,
10921187    char  buffer [1024 ];  // Buffer for DWARF data (1KB should be sufficient) 
10931188    ctx .code_size  =  code_size ;
10941189    ctx .startp  =  ctx .p  =  (uint8_t * )buffer ;
1190+     ctx .fde_p  =  NULL ;  // Initialize to NULL, will be set when FDE is written 
10951191
10961192    /* Generate EH frame (Exception Handling frame) data */ 
10971193    elf_init_ehframe (& ctx );
@@ -1110,7 +1206,7 @@ static void perf_map_jit_write_entry(void *state, const void *code_addr,
11101206    ev2 .unwind_data_size  =  sizeof (EhFrameHeader ) +  eh_frame_size ;
11111207
11121208    /* Verify we don't exceed our padding budget */ 
1113-     assert (ev2 .unwind_data_size  <= PERF_JIT_CODE_PADDING );
1209+     assert (ev2 .unwind_data_size  <= ( uint64_t ) trampoline_api . code_padding );
11141210
11151211    ev2 .eh_frame_hdr_size  =  sizeof (EhFrameHeader );
11161212    ev2 .mapped_size  =  round_up (ev2 .unwind_data_size , 16 );  // 16-byte alignment 
@@ -1262,4 +1358,4 @@ _PyPerf_Callbacks _Py_perfmap_jit_callbacks = {
12621358    & perf_map_jit_fini ,        // Cleanup function 
12631359};
12641360
1265- #endif  /* PY_HAVE_PERF_TRAMPOLINE */ 
1361+ #endif  /* PY_HAVE_PERF_TRAMPOLINE */ 
0 commit comments