@@ -52,13 +52,14 @@ extern "C" void ghost_ret_trampoline();
5252// Logging (minimal, stderr only)
5353// ============================================================================
5454
55+ // #define DEBUG
5556#ifdef DEBUG
56- #define LOG_DEBUG (...) fprintf(stderr, " [GhostStack ] " __VA_ARGS__)
57+ #define LOG_DEBUG (...) do { fprintf (stderr, " [GS ] " __VA_ARGS__); fflush (stderr); } while ( 0 )
5758#else
5859#define LOG_DEBUG (...) ((void )0 )
5960#endif
6061
61- #define LOG_ERROR (...) fprintf(stderr, " [GhostStack][ERROR ] " __VA_ARGS__)
62+ #define LOG_ERROR (...) do { fprintf (stderr, " [GS][ERR ] " __VA_ARGS__); fflush (stderr); } while ( 0 )
6263
6364// ============================================================================
6465// Utilities
@@ -179,17 +180,6 @@ class GhostStackImpl {
179180
180181 /* *
181182 * Called by trampoline when a function returns.
182- *
183- * Uses epoch-based validation to detect if reset() was called during
184- * execution (e.g., from a signal handler). This prevents accessing
185- * stale or cleared entries.
186- *
187- * Implements longjmp detection by comparing the current stack pointer
188- * against the expected value. If they don't match, searches backward
189- * through the shadow stack to find the matching entry (like nwind does).
190- *
191- * @param sp Stack pointer at return time (for longjmp detection)
192- * @return Original return address to jump to
193183 */
194184 uintptr_t on_ret_trampoline (uintptr_t sp) {
195185 // Capture current epoch - if it changes, reset() was called
@@ -199,7 +189,8 @@ class GhostStackImpl {
199189 size_t tail = tail_.fetch_sub (1 , std::memory_order_acq_rel) - 1 ;
200190
201191 if (entries_.empty () || tail >= entries_.size ()) {
202- LOG_ERROR (" Stack corruption in trampoline!\n " );
192+ LOG_ERROR (" CORRUPTION! empty=%d tail=%zu sz=%zu\n " ,
193+ (int )entries_.empty (), tail, entries_.size ());
203194 std::abort ();
204195 }
205196
@@ -208,17 +199,10 @@ class GhostStackImpl {
208199 // Check for longjmp: if SP doesn't match expected, search backward
209200 // through shadow stack for matching entry (frames were skipped)
210201 if (sp != 0 && entry.stack_pointer != 0 && entry.stack_pointer != sp) {
211- LOG_DEBUG (" SP mismatch at index %zu: expected 0x%lx, got 0x%lx - checking for longjmp\n " ,
212- tail, entry.stack_pointer , sp);
213-
214202 // Search backward through shadow stack for matching SP (nwind style)
215203 // Only update tail_ if we find a match - don't corrupt it during search
216204 for (size_t i = tail; i > 0 ; --i) {
217205 if (entries_[i - 1 ].stack_pointer == sp) {
218- size_t skipped = tail - (i - 1 );
219- LOG_DEBUG (" longjmp detected: found matching SP at index %zu (skipped %zu frames)\n " ,
220- i - 1 , skipped);
221-
222206 // Update tail_ to skip all the frames that were bypassed by longjmp
223207 tail_.store (i - 1 , std::memory_order_release);
224208 tail = i - 1 ;
@@ -234,8 +218,7 @@ class GhostStackImpl {
234218 std::abort ();
235219 }
236220
237- uintptr_t ret_addr = entries_[tail].return_address ;
238- return ret_addr;
221+ return entries_[tail].return_address ;
239222 }
240223
241224private:
@@ -254,7 +237,6 @@ class GhostStackImpl {
254237 buffer[i] = reinterpret_cast <void *>(entries_[count - 1 - i].ip );
255238 }
256239
257- LOG_DEBUG (" Fast path: %zu frames\n " , count);
258240 return count;
259241 }
260242
@@ -278,9 +260,12 @@ class GhostStackImpl {
278260 unw_getcontext (&ctx);
279261 unw_init_local (&cursor, &ctx);
280262
263+ // Skip the current frame to avoid patching our own return address
264+ if (unw_step (&cursor) > 0 ) {
265+ // Skipped internal frame
266+ }
267+
281268 // Process frames: read current frame, then step to next
282- // Note: After skip loop, cursor is positioned AT the first frame we want
283- // We need to read first, then step (not step-then-read)
284269 size_t frame_idx = 0 ;
285270 int step_result;
286271 do {
@@ -309,17 +294,31 @@ class GhostStackImpl {
309294
310295 // Get location where return address is stored
311296 uintptr_t * ret_loc = nullptr ;
297+
298+ // Get actual SP (needed for ARM64 expected_sp calculation)
299+ unw_word_t actual_sp;
300+ unw_get_reg (&cursor, UNW_REG_SP, &actual_sp);
301+
312302#ifdef __linux__
313303 unw_save_loc_t loc;
314- if (unw_get_save_loc (&cursor, GS_RA_REGISTER, &loc) == 0 &&
315- loc.type == UNW_SLT_MEMORY) {
304+ int save_loc_ret = unw_get_save_loc (&cursor, GS_RA_REGISTER, &loc);
305+
306+ if (save_loc_ret == 0 && loc.type == UNW_SLT_MEMORY && loc.u .addr != 0 ) {
316307 ret_loc = reinterpret_cast <uintptr_t *>(loc.u .addr );
308+ // Sanity check: ret_loc should be somewhere near FP (which is our sp variable)
309+ uintptr_t addr = loc.u .addr ;
310+ if (addr < sp - 0x10000 || addr > sp + 0x10000 ) {
311+ ret_loc = nullptr ; // Don't use this suspicious address
312+ }
317313 }
318314#else
319315 // macOS: return address is at fp + sizeof(void*)
320316 ret_loc = reinterpret_cast <uintptr_t *>(sp + sizeof (void *));
321317#endif
322- if (!ret_loc) break ;
318+
319+ if (!ret_loc) {
320+ break ;
321+ }
323322
324323 uintptr_t ret_addr = *ret_loc;
325324
@@ -332,16 +331,22 @@ class GhostStackImpl {
332331 // Compare against stripped address since trampoline address doesn't have PAC
333332 if (stripped_ret_addr == reinterpret_cast <uintptr_t >(ghost_ret_trampoline)) {
334333 found_existing = true ;
335- LOG_DEBUG (" Found existing trampoline at frame %zu\n " , frame_idx);
336334 break ;
337335 }
338336
339337 // Store the stack pointer that the trampoline will pass.
340- // The trampoline passes RSP right after landing (before its stack manipulations).
341- // When RET executes, it pops the return address, so:
342- // RSP_trampoline = ret_loc + sizeof(void*)
343338 // This allows longjmp detection by comparing against the stored value.
339+ //
340+ // On x86_64: RET pops return address, so trampoline sees ret_loc + 8
341+ // On ARM64: RET doesn't touch SP. The trampoline receives the actual SP
342+ // at the moment of return (after the function's epilogue ran).
343+ // This is the value from UNW_REG_SP, not the FP (UNW_AARCH64_X29).
344+ #ifdef GS_ARCH_AARCH64
345+ uintptr_t expected_sp = actual_sp; // Actual SP at this frame
346+ #else
344347 uintptr_t expected_sp = reinterpret_cast <uintptr_t >(ret_loc) + sizeof (void *);
348+ #endif
349+
345350 // Store both IP (for returning to caller) and return_address (for trampoline restoration)
346351 // Insert at beginning to reverse order (oldest at index 0, newest at end)
347352 new_entries.insert (new_entries.begin (), {ip, ret_addr, ret_loc, expected_sp});
@@ -351,8 +356,10 @@ class GhostStackImpl {
351356 } while (step_result > 0 );
352357
353358 // Install trampolines on new entries
354- for (auto & e : new_entries) {
355- *e.location = reinterpret_cast <uintptr_t >(ghost_ret_trampoline);
359+ uintptr_t tramp_addr = reinterpret_cast <uintptr_t >(ghost_ret_trampoline);
360+ for (size_t i = 0 ; i < new_entries.size (); ++i) {
361+ auto & e = new_entries[i];
362+ *e.location = tramp_addr;
356363 }
357364
358365 // Merge with existing entries if we found a patched frame
@@ -375,7 +382,6 @@ class GhostStackImpl {
375382 buffer[i] = reinterpret_cast <void *>(entries_[count - 1 - i].ip );
376383 }
377384
378- LOG_DEBUG (" Captured %zu frames\n " , count);
379385 return count;
380386 }
381387
@@ -431,7 +437,6 @@ struct ThreadLocalInstance {
431437
432438 ~ThreadLocalInstance () {
433439 if (ptr) {
434- LOG_DEBUG (" Thread exit: resetting shadow stack\n " );
435440 ptr->reset ();
436441 delete ptr;
437442 ptr = nullptr ;
@@ -444,7 +449,6 @@ static thread_local ThreadLocalInstance t_instance;
444449static GhostStackImpl& get_instance () {
445450 if (!t_instance.ptr ) {
446451 t_instance.ptr = new GhostStackImpl ();
447- LOG_DEBUG (" Created new shadow stack instance for thread\n " );
448452 }
449453 return *t_instance.ptr ;
450454}
@@ -473,13 +477,11 @@ static void fork_child_handler() {
473477 if (t_instance.ptr ) {
474478 t_instance.ptr ->reset ();
475479 }
476- LOG_DEBUG (" Fork child handler: reset shadow stack\n " );
477480}
478481
479482static void register_atfork_handler () {
480483 std::call_once (g_atfork_flag, []() {
481484 pthread_atfork (nullptr , nullptr , fork_child_handler);
482- LOG_DEBUG (" Registered pthread_atfork handler\n " );
483485 });
484486}
485487
@@ -492,8 +494,6 @@ extern "C" {
492494void ghost_stack_init (ghost_stack_unwinder_t unwinder) {
493495 std::call_once (g_init_flag, [unwinder]() {
494496 g_custom_unwinder = unwinder;
495- LOG_DEBUG (" Initialized with %s unwinder\n " ,
496- unwinder ? " custom" : " default" );
497497 });
498498
499499 // Register fork handler (idempotent, safe to call multiple times)
@@ -542,8 +542,6 @@ uintptr_t ghost_trampoline_handler(uintptr_t sp) {
542542
543543// Called when exception passes through trampoline
544544uintptr_t ghost_exception_handler (void * exception) {
545- LOG_DEBUG (" Exception through trampoline\n " );
546-
547545 auto & impl = get_instance ();
548546 uintptr_t ret = impl.pop_entry (); // Direct pop, no longjmp check
549547 impl.reset ();
0 commit comments