Skip to content

Commit 8b228c0

Browse files
committed
gh-136459: Use frame pointers in the x86_64 perf trampolines
1 parent c176543 commit 8b228c0

File tree

2 files changed

+82
-16
lines changed

2 files changed

+82
-16
lines changed

Python/asm_trampoline.S

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ _Py_trampoline_func_start:
1212
#if defined(__CET__) && (__CET__ & 1)
1313
endbr64
1414
#endif
15-
sub $8, %rsp
16-
call *%rcx
17-
add $8, %rsp
15+
push %rbp
16+
mov %rsp, %rbp
17+
call *%rcx
18+
pop %rbp
1819
ret
1920
#endif // __x86_64__
2021
#if defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)

Python/perf_jit_trampoline.c

Lines changed: 78 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -401,10 +401,12 @@ enum {
401401
DWRF_CFA_nop = 0x0, // No operation
402402
DWRF_CFA_offset_extended = 0x5, // Extended offset instruction
403403
DWRF_CFA_def_cfa = 0xc, // Define CFA rule
404+
DWRF_CFA_def_cfa_register = 0xd, // Define CFA register
404405
DWRF_CFA_def_cfa_offset = 0xe, // Define CFA offset
405406
DWRF_CFA_offset_extended_sf = 0x11, // Extended signed offset
406407
DWRF_CFA_advance_loc = 0x40, // Advance location counter
407-
DWRF_CFA_offset = 0x80 // Simple offset instruction
408+
DWRF_CFA_offset = 0x80, // Simple offset instruction
409+
DWRF_CFA_restore = 0xc0 // Restore register
408410
};
409411

410412
/* DWARF Exception Handling pointer encodings */
@@ -519,6 +521,7 @@ typedef struct ELFObjectContext {
519521
uint8_t* p; // Current write position in buffer
520522
uint8_t* startp; // Start of buffer (for offset calculations)
521523
uint8_t* eh_frame_p; // Start of EH frame data (for relative offsets)
524+
uint8_t* fde_p; // Start of FDE data (for PC-relative calculations)
522525
uint32_t code_size; // Size of the code being described
523526
} ELFObjectContext;
524527

@@ -784,7 +787,7 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
784787
*
785788
* DWRF_SECTION(FDE,
786789
* DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (relative from here)
787-
* DWRF_U32(-0x30); // Initial PC-relative location of the code
790+
* DWRF_U32(pc_relative_offset); // PC-relative location of the code (calculated dynamically)
788791
* DWRF_U32(ctx->code_size); // Code range covered by this FDE
789792
* DWRF_U8(0); // Augmentation data length (none)
790793
*
@@ -853,11 +856,15 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
853856
*
854857
* The FDE describes unwinding information specific to this function.
855858
* It references the CIE and provides function-specific CFI instructions.
859+
*
860+
* The PC-relative offset is calculated after the entire EH frame is built
861+
* to ensure accurate positioning relative to the synthesized DSO layout.
856862
*/
857863
DWRF_SECTION(FDE,
858864
DWRF_U32((uint32_t)(p - framep)); // Offset to CIE (backwards reference)
859-
DWRF_U32(-0x30); // Machine code offset relative to .text
860-
DWRF_U32(ctx->code_size); // Address range covered by this FDE (code lenght)
865+
ctx->fde_p = p; // Remember where PC offset field is located for later calculation
866+
DWRF_U32(0); // Placeholder for PC-relative offset (calculated at end of elf_init_ehframe)
867+
DWRF_U32(ctx->code_size); // Address range covered by this FDE (code length)
861868
DWRF_U8(0); // Augmentation data length (none)
862869

863870
/*
@@ -868,17 +875,22 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
868875
* conventions and register usage patterns.
869876
*/
870877
#ifdef __x86_64__
871-
/* x86_64 calling convention unwinding rules */
878+
/* x86_64 calling convention unwinding rules with frame pointer */
872879
# if defined(__CET__) && (__CET__ & 1)
873-
DWRF_U8(DWRF_CFA_advance_loc | 8); // Advance location by 8 bytes when CET protection is enabled
874-
# else
875-
DWRF_U8(DWRF_CFA_advance_loc | 4); // Advance location by 4 bytes
880+
DWRF_U8(DWRF_CFA_advance_loc | 4); // Advance past endbr64 (4 bytes)
876881
# endif
877-
DWRF_U8(DWRF_CFA_def_cfa_offset); // Redefine CFA offset
878-
DWRF_UV(16); // New offset: SP + 16
879-
DWRF_U8(DWRF_CFA_advance_loc | 6); // Advance location by 6 bytes
880-
DWRF_U8(DWRF_CFA_def_cfa_offset); // Redefine CFA offset
881-
DWRF_UV(8); // New offset: SP + 8
882+
DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance past push %rbp (1 byte)
883+
DWRF_U8(DWRF_CFA_def_cfa_offset); // def_cfa_offset 16
884+
DWRF_UV(16);
885+
DWRF_U8(DWRF_CFA_offset | DWRF_REG_BP); // offset r6 at cfa-16
886+
DWRF_UV(2);
887+
DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past mov %rsp,%rbp (3 bytes)
888+
DWRF_U8(DWRF_CFA_def_cfa_register); // def_cfa_register r6
889+
DWRF_UV(DWRF_REG_BP);
890+
DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past call *%rcx (2 bytes) + pop %rbp (1 byte) = 3
891+
DWRF_U8(DWRF_CFA_def_cfa); // def_cfa r7 ofs 8
892+
DWRF_UV(DWRF_REG_SP);
893+
DWRF_UV(8);
882894
#elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
883895
/* AArch64 calling convention unwinding rules */
884896
DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance location by 1 instruction (stp x29, x30)
@@ -902,6 +914,58 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
902914
)
903915

904916
ctx->p = p; // Update context pointer to end of generated data
917+
918+
/* Calculate and update the PC-relative offset in the FDE
919+
*
920+
* When perf processes the jitdump, it creates a synthesized DSO with this layout:
921+
*
922+
* Synthesized DSO Memory Layout:
923+
* ┌─────────────────────────────────────────────────────────────┐ < code_start
924+
* │ Code Section │
925+
* │ (round_up(code_size, 8) bytes) │
926+
* ├─────────────────────────────────────────────────────────────┤ < start of EH frame data
927+
* │ EH Frame Data │
928+
* │ ┌─────────────────────────────────────────────────────┐ │
929+
* │ │ CIE data │ │
930+
* │ └─────────────────────────────────────────────────────┘ │
931+
* │ ┌─────────────────────────────────────────────────────┐ │
932+
* │ │ FDE Header: │ │
933+
* │ │ - CIE offset (4 bytes) │ │
934+
* │ │ - PC offset (4 bytes) <─ fde_offset_in_frame ─────┼────┼─> points to code_start
935+
* │ │ - address range (4 bytes) │ │ (this specific field)
936+
* │ │ CFI Instructions... │ │
937+
* │ └─────────────────────────────────────────────────────┘ │
938+
* ├─────────────────────────────────────────────────────────────┤ < reference_point
939+
* │ EhFrameHeader │
940+
* │ (navigation metadata) │
941+
* └─────────────────────────────────────────────────────────────┘
942+
*
943+
* The PC offset field in the FDE must contain the distance from itself to code_start:
944+
*
945+
* distance = code_start - fde_pc_field
946+
*
947+
* Where:
948+
* fde_pc_field_location = reference_point - eh_frame_size + fde_offset_in_frame
949+
* code_start_location = reference_point - eh_frame_size - round_up(code_size, 8)
950+
*
951+
* Therefore:
952+
* distance = code_start_location - fde_pc_field_location
953+
* = (ref - eh_frame_size - rounded_code_size) - (ref - eh_frame_size + fde_offset_in_frame)
954+
* = -rounded_code_size - fde_offset_in_frame
955+
* = -(round_up(code_size, 8) + fde_offset_in_frame)
956+
*
957+
* Note: fde_offset_in_frame is the offset from EH frame start to the PC offset field,
958+
*
959+
*/
960+
if (ctx->fde_p != NULL) {
961+
int32_t fde_offset_in_frame = (ctx->fde_p - ctx->startp);
962+
int32_t rounded_code_size = round_up(ctx->code_size, 8);
963+
int32_t pc_relative_offset = -(rounded_code_size + fde_offset_in_frame);
964+
965+
966+
// Update the PC-relative offset in the FDE
967+
*(int32_t*)ctx->fde_p = pc_relative_offset;
968+
}
905969
}
906970

907971
// =============================================================================
@@ -1092,6 +1156,7 @@ static void perf_map_jit_write_entry(void *state, const void *code_addr,
10921156
char buffer[1024]; // Buffer for DWARF data (1KB should be sufficient)
10931157
ctx.code_size = code_size;
10941158
ctx.startp = ctx.p = (uint8_t*)buffer;
1159+
ctx.fde_p = NULL; // Initialize to NULL, will be set when FDE is written
10951160

10961161
/* Generate EH frame (Exception Handling frame) data */
10971162
elf_init_ehframe(&ctx);

0 commit comments

Comments
 (0)