99
1010#include " xenia/cpu/backend/x64/x64_code_cache.h"
1111
12+ #include < cstring>
13+ #include < vector>
14+
15+ #include " xenia/base/assert.h"
16+ #include " xenia/base/logging.h"
17+ #include " xenia/base/math.h"
18+ #include " xenia/cpu/backend/x64/x64_stack_layout.h"
19+
20+ // libgcc/libunwind APIs for registering DWARF .eh_frame unwind info.
21+ extern " C" void __register_frame (void *);
22+ extern " C" void __deregister_frame (void *);
23+
1224namespace xe {
1325namespace cpu {
1426namespace backend {
1527namespace x64 {
1628
29+ // Maximum size of DWARF .eh_frame data per function (CIE + FDE + terminator).
30+ static constexpr uint32_t kMaxUnwindInfoSize = 96 ;
31+
32+ // DWARF register numbers for x86-64.
33+ static constexpr uint8_t kDwarfRegRBX = 3 ;
34+ static constexpr uint8_t kDwarfRegRBP = 6 ;
35+ static constexpr uint8_t kDwarfRegRSP = 7 ;
36+ static constexpr uint8_t kDwarfRegR12 = 12 ;
37+ static constexpr uint8_t kDwarfRegR13 = 13 ;
38+ static constexpr uint8_t kDwarfRegR14 = 14 ;
39+ static constexpr uint8_t kDwarfRegR15 = 15 ;
40+ static constexpr uint8_t kDwarfRegRA = 16 ;
41+
42+ // DWARF CFA opcodes.
43+ static constexpr uint8_t kDW_CFA_advance_loc1 = 0x02 ;
44+ static constexpr uint8_t kDW_CFA_advance_loc2 = 0x03 ;
45+ static constexpr uint8_t kDW_CFA_def_cfa = 0x0c ;
46+ static constexpr uint8_t kDW_CFA_def_cfa_offset = 0x0e ;
47+ static constexpr uint8_t kDW_CFA_nop = 0x00 ;
48+
49+ // DWARF pointer encoding constants.
50+ static constexpr uint8_t kDW_EH_PE_pcrel = 0x10 ;
51+ static constexpr uint8_t kDW_EH_PE_sdata4 = 0x0b ;
52+
53+ static size_t WriteULEB128 (uint8_t * p, uint64_t value) {
54+ size_t count = 0 ;
55+ do {
56+ uint8_t byte = value & 0x7F ;
57+ value >>= 7 ;
58+ if (value) byte |= 0x80 ;
59+ p[count++] = byte;
60+ } while (value);
61+ return count;
62+ }
63+
64+ static size_t WriteSLEB128 (uint8_t * p, int64_t value) {
65+ size_t count = 0 ;
66+ bool more = true ;
67+ while (more) {
68+ uint8_t byte = value & 0x7F ;
69+ value >>= 7 ;
70+ if ((value == 0 && !(byte & 0x40 )) || (value == -1 && (byte & 0x40 ))) {
71+ more = false ;
72+ } else {
73+ byte |= 0x80 ;
74+ }
75+ p[count++] = byte;
76+ }
77+ return count;
78+ }
79+
1780class PosixX64CodeCache : public X64CodeCache {
1881 public:
1982 PosixX64CodeCache ();
@@ -24,26 +87,240 @@ class PosixX64CodeCache : public X64CodeCache {
2487 void * LookupUnwindInfo (uint64_t host_pc) override { return nullptr ; }
2588
2689 private:
27- /*
2890 UnwindReservation RequestUnwindReservation (uint8_t * entry_address) override ;
29- void PlaceCode(uint32_t guest_address, void* machine_code, size_t code_size,
30- size_t stack_size , void* code_execute_address,
91+ void PlaceCode (uint32_t guest_address, void * machine_code,
92+ const EmitFunctionInfo& func_info , void * code_execute_address,
3193 UnwindReservation unwind_reservation) override ;
3294
3395 void InitializeUnwindEntry (uint8_t * unwind_entry_address,
34- size_t unwind_table_slot, void* code_address,
35- size_t code_size, size_t stack_size);
36- */
96+ void * code_execute_address,
97+ const EmitFunctionInfo& func_info);
98+
99+ // Pointers registered with __register_frame, for cleanup.
100+ std::vector<void *> registered_frames_;
101+ // Current number of unwind table entries.
102+ uint32_t unwind_table_count_ = 0 ;
37103};
38104
39105std::unique_ptr<X64CodeCache> X64CodeCache::Create () {
40106 return std::make_unique<PosixX64CodeCache>();
41107}
42108
43109PosixX64CodeCache::PosixX64CodeCache () = default ;
44- PosixX64CodeCache::~PosixX64CodeCache () = default ;
45110
46- bool PosixX64CodeCache::Initialize () { return X64CodeCache::Initialize (); }
111+ PosixX64CodeCache::~PosixX64CodeCache () {
112+ for (auto frame : registered_frames_) {
113+ __deregister_frame (frame);
114+ }
115+ }
116+
117+ bool PosixX64CodeCache::Initialize () {
118+ if (!X64CodeCache::Initialize ()) {
119+ return false ;
120+ }
121+ registered_frames_.reserve (kMaximumFunctionCount );
122+ return true ;
123+ }
124+
125+ X64CodeCache::UnwindReservation PosixX64CodeCache::RequestUnwindReservation (
126+ uint8_t * entry_address) {
127+ #if defined(NDEBUG)
128+ if (unwind_table_count_ >= kMaximumFunctionCount ) {
129+ xe::FatalError (
130+ " Unwind table count exceeded maximum! Please report this to "
131+ " Xenia developers" );
132+ }
133+ #else
134+ assert_false (unwind_table_count_ >= kMaximumFunctionCount );
135+ #endif
136+ UnwindReservation unwind_reservation;
137+ unwind_reservation.data_size = xe::round_up (kMaxUnwindInfoSize , 16 );
138+ unwind_reservation.table_slot = unwind_table_count_++;
139+ unwind_reservation.entry_address = entry_address;
140+ return unwind_reservation;
141+ }
142+
143+ void PosixX64CodeCache::PlaceCode (uint32_t guest_address, void * machine_code,
144+ const EmitFunctionInfo& func_info,
145+ void * code_execute_address,
146+ UnwindReservation unwind_reservation) {
147+ // Write the DWARF .eh_frame data into the reserved unwind space.
148+ InitializeUnwindEntry (unwind_reservation.entry_address , code_execute_address,
149+ func_info);
150+
151+ // Register with the runtime unwinder using the execute-side address.
152+ // The execute mapping is readable (kExecuteReadOnly = PROT_EXEC|PROT_READ),
153+ // so the unwinder can read the .eh_frame data at runtime.
154+ void * unwind_execute_address = unwind_reservation.entry_address -
155+ generated_code_write_base_ +
156+ generated_code_execute_base_;
157+ __register_frame (unwind_execute_address);
158+ registered_frames_.push_back (unwind_execute_address);
159+ }
160+
161+ void PosixX64CodeCache::InitializeUnwindEntry (
162+ uint8_t * unwind_entry_address, void * code_execute_address,
163+ const EmitFunctionInfo& func_info) {
164+ // Compute execute-side base address of the unwind buffer.
165+ // We write via the write mapping but pc-relative offsets must be relative
166+ // to the execute mapping (which is what __register_frame sees).
167+ uint8_t * unwind_execute_base = unwind_entry_address -
168+ generated_code_write_base_ +
169+ generated_code_execute_base_;
170+
171+ uint8_t * p = unwind_entry_address;
172+ uint8_t * cie_start = p;
173+
174+ // === CIE (Common Information Entry) ===
175+ uint8_t * cie_length_ptr = p;
176+ p += 4 ; // placeholder for length
177+
178+ uint8_t * cie_content_start = p;
179+
180+ // CIE ID = 0 (distinguishes CIE from FDE in .eh_frame format).
181+ *reinterpret_cast <uint32_t *>(p) = 0 ;
182+ p += 4 ;
183+
184+ // Version = 1.
185+ *p++ = 1 ;
186+
187+ // Augmentation string "zR" - indicates augmentation data with FDE encoding.
188+ *p++ = ' z' ;
189+ *p++ = ' R' ;
190+ *p++ = ' \0 ' ;
191+
192+ // Code alignment factor = 1.
193+ p += WriteULEB128 (p, 1 );
194+
195+ // Data alignment factor = -8.
196+ p += WriteSLEB128 (p, -8 );
197+
198+ // Return address register column = 16 (x86-64 RA).
199+ p += WriteULEB128 (p, kDwarfRegRA );
200+
201+ // Augmentation data length = 1 (just the FDE encoding byte).
202+ p += WriteULEB128 (p, 1 );
203+
204+ // FDE pointer encoding: pc-relative, signed 32-bit.
205+ *p++ = kDW_EH_PE_pcrel | kDW_EH_PE_sdata4 ;
206+
207+ // Initial instructions:
208+ // DW_CFA_def_cfa RSP, 8 — at function entry, CFA = RSP + 8.
209+ *p++ = kDW_CFA_def_cfa ;
210+ p += WriteULEB128 (p, kDwarfRegRSP );
211+ p += WriteULEB128 (p, 8 );
212+
213+ // DW_CFA_offset RA, 1 — return address at CFA - 8 (factored: 1 * 8).
214+ *p++ = 0x80 | kDwarfRegRA ;
215+ p += WriteULEB128 (p, 1 );
216+
217+ // Pad CIE to pointer-size (8-byte) alignment.
218+ size_t cie_content_len = static_cast <size_t >(p - cie_content_start);
219+ size_t cie_padded_len = xe::round_up (cie_content_len, sizeof (void *));
220+ while (p < cie_content_start + cie_padded_len) {
221+ *p++ = kDW_CFA_nop ;
222+ }
223+
224+ // Write CIE length (excludes the length field itself).
225+ *reinterpret_cast <uint32_t *>(cie_length_ptr) =
226+ static_cast <uint32_t >(p - cie_content_start);
227+
228+ // === FDE (Frame Description Entry) ===
229+ uint8_t * fde_length_ptr = p;
230+ p += 4 ; // placeholder for length
231+
232+ uint8_t * fde_content_start = p;
233+
234+ // CIE pointer: offset from this field back to the start of the CIE.
235+ *reinterpret_cast <uint32_t *>(p) = static_cast <uint32_t >(p - cie_start);
236+ p += 4 ;
237+
238+ // PC begin: pc-relative offset to the start of the function code.
239+ // Computed relative to the execute-side address of this field.
240+ uint8_t * pc_begin_execute_addr =
241+ unwind_execute_base + (p - unwind_entry_address);
242+ *reinterpret_cast <int32_t *>(p) =
243+ static_cast <int32_t >(reinterpret_cast <intptr_t >(code_execute_address) -
244+ reinterpret_cast <intptr_t >(pc_begin_execute_addr));
245+ p += 4 ;
246+
247+ // PC range: size of the function code.
248+ *reinterpret_cast <uint32_t *>(p) =
249+ static_cast <uint32_t >(func_info.code_size .total );
250+ p += 4 ;
251+
252+ // Augmentation data length = 0 (no LSDA pointer).
253+ p += WriteULEB128 (p, 0 );
254+
255+ // FDE instructions: describe how the stack frame changes during the prolog.
256+ if (func_info.stack_size > 0 ) {
257+ // Advance location to the instruction after the stack allocation.
258+ size_t alloc_offset = func_info.prolog_stack_alloc_offset ;
259+ assert_true (alloc_offset > 0 );
260+ if (alloc_offset < 64 ) {
261+ *p++ = 0x40 | static_cast <uint8_t >(alloc_offset);
262+ } else if (alloc_offset < 256 ) {
263+ *p++ = kDW_CFA_advance_loc1 ;
264+ *p++ = static_cast <uint8_t >(alloc_offset);
265+ } else {
266+ *p++ = kDW_CFA_advance_loc2 ;
267+ *reinterpret_cast <uint16_t *>(p) = static_cast <uint16_t >(alloc_offset);
268+ p += 2 ;
269+ }
270+
271+ // DW_CFA_def_cfa_offset: CFA = RSP + 8 + stack_size after stack alloc.
272+ *p++ = kDW_CFA_def_cfa_offset ;
273+ p += WriteULEB128 (p, 8 + func_info.stack_size );
274+
275+ // For thunk functions, encode callee-saved register save locations.
276+ // The thunk saves non-volatile registers at known offsets from RSP.
277+ if (func_info.stack_size == StackLayout::THUNK_STACK_SIZE) {
278+ size_t cfa = 8 + func_info.stack_size ; // 272
279+
280+ // RBX at rsp+0x18 → CFA-248, factored offset = 31
281+ *p++ = 0x80 | kDwarfRegRBX ;
282+ p += WriteULEB128 (p, (cfa - 0x18 ) / 8 );
283+
284+ // RBP at rsp+0x20 → CFA-240, factored offset = 30
285+ *p++ = 0x80 | kDwarfRegRBP ;
286+ p += WriteULEB128 (p, (cfa - 0x20 ) / 8 );
287+
288+ // R12 at rsp+0x40 → CFA-208, factored offset = 26
289+ *p++ = 0x80 | kDwarfRegR12 ;
290+ p += WriteULEB128 (p, (cfa - 0x40 ) / 8 );
291+
292+ // R13 at rsp+0x48 → CFA-200, factored offset = 25
293+ *p++ = 0x80 | kDwarfRegR13 ;
294+ p += WriteULEB128 (p, (cfa - 0x48 ) / 8 );
295+
296+ // R14 at rsp+0x50 → CFA-192, factored offset = 24
297+ *p++ = 0x80 | kDwarfRegR14 ;
298+ p += WriteULEB128 (p, (cfa - 0x50 ) / 8 );
299+
300+ // R15 at rsp+0x58 → CFA-184, factored offset = 23
301+ *p++ = 0x80 | kDwarfRegR15 ;
302+ p += WriteULEB128 (p, (cfa - 0x58 ) / 8 );
303+ }
304+ }
305+
306+ // Pad FDE to pointer-size (8-byte) alignment.
307+ size_t fde_content_len = static_cast <size_t >(p - fde_content_start);
308+ size_t fde_padded_len = xe::round_up (fde_content_len, sizeof (void *));
309+ while (p < fde_content_start + fde_padded_len) {
310+ *p++ = kDW_CFA_nop ;
311+ }
312+
313+ // Write FDE length.
314+ *reinterpret_cast <uint32_t *>(fde_length_ptr) =
315+ static_cast <uint32_t >(p - fde_content_start);
316+
317+ // === Terminator (zero-length entry) ===
318+ *reinterpret_cast <uint32_t *>(p) = 0 ;
319+ p += 4 ;
320+
321+ assert_true (static_cast <size_t >(p - unwind_entry_address) <=
322+ kMaxUnwindInfoSize );
323+ }
47324
48325} // namespace x64
49326} // namespace backend
0 commit comments