Skip to content

Commit 0b8da1b

Browse files
authored
Merge pull request #609 from NotRequiem/dev
VM::TRAP stack unwinding issues because rbx clobbering
2 parents 26ac24a + 4e25276 commit 0b8da1b

File tree

2 files changed

+89
-52
lines changed

2 files changed

+89
-52
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ endif()
2727

2828
# compiler flags
2929
if (MSVC)
30-
set(CMAKE_CXX_FLAGS "/Wall /W4 /EHsc")
30+
set(CMAKE_CXX_FLAGS "/W4 /EHsc")
3131
else()
3232
# Linux and Apple
3333
set(CMAKE_CXX_FLAGS "-Wextra -Wall -Wextra -Wconversion -Wdouble-promotion -Wno-unused-parameter -Wno-unused-function -Wno-sign-conversion")

src/vmaware.hpp

Lines changed: 88 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -9199,17 +9199,18 @@ struct VM {
91999199

92009200
/**
92019201
* @brief Check if after raising two traps at the same RIP, a hypervisor interferes with the instruction pointer delivery
9202-
* @category Windows, x86
9202+
* @category Windows, x86_64
92039203
* @implements VM::TRAP
92049204
*/
92059205
[[nodiscard]] static bool trap() {
92069206
bool hypervisorCaught = false;
9207-
#if (x86)
9208-
// when a single-step (TF) and hardware breakpoint (DR0) collide, Intel CPUs set both DR6.BS and DR6.B0 to report both events, which help make this detection trick
9207+
#if (x86_64)
9208+
// when a single - step(TF) and hardware breakpoint(DR0) collide, Intel CPUs set both DR6.BS and DR6.B0 to report both events, which help make this detection trick
92099209
// AMD CPUs prioritize the breakpoint, setting only its corresponding bit in DR6 and clearing the single-step bit, which is why this technique is not compatible with AMD
92109210
if (!cpu::is_intel()) {
92119211
return false;
92129212
}
9213+
92139214
// mobile SKUs can "false flag" this check
92149215
const char* brand = cpu::get_brand();
92159216
for (const char* c = brand; *c; ++c) {
@@ -9222,15 +9223,19 @@ struct VM {
92229223
}
92239224
}
92249225

9225-
// push flags, set TF-bit, pop flags, execute a dummy instruction, then return
9226+
// We must preserve RBX because CPUID clobbers it, and RBX is a non-volatile
9227+
// register in x64. If we don't restore it, the calling function (VM::check) crashes
9228+
// we use MOV R8, RBX instead of PUSH RBX. Pushing to the stack without
9229+
// unwind metadata breaks SEH in x64 (OS cannot find the handler), causing a crash
92269230
constexpr u8 trampoline[] = {
9227-
0x9C, // pushfq
9228-
0x81, 0x04, 0x24, // OR DWORD PTR [RSP], 0x10100
9231+
0x49, 0x89, 0xD8, // mov r8, rbx (save rbx to volatile register r8)
9232+
0x9C, // pushfq
9233+
0x81, 0x04, 0x24, // OR DWORD PTR [RSP], 0x10100 (Set TF)
92299234
0x00, 0x01, 0x01, 0x00,
9230-
0x9D, // popfq
9231-
0x0F, 0xA2, // cpuid (or any other trappable instruction, but this one is ok since it has to be trapped in every x86 hv)
9232-
0x90, 0x90, 0x90, // NOPs to pad to breakpoint offset
9233-
0xC3 // ret
9235+
0x9D, // popfq
9236+
0x0F, 0xA2, // cpuid
9237+
0x4C, 0x89, 0xC3, // mov rbx, r8 (restore rbx from r8) - trap happens here
9238+
0xC3 // ret
92349239
};
92359240
SIZE_T trampSize = sizeof(trampoline);
92369241

@@ -9257,13 +9262,14 @@ struct VM {
92579262
using NtGetContextThread_t = NTSTATUS(__stdcall*)(HANDLE, PCONTEXT);
92589263
using NtSetContextThread_t = NTSTATUS(__stdcall*)(HANDLE, PCONTEXT);
92599264

9260-
const auto pNtAllocateVirtualMemory = reinterpret_cast<NtAllocateVirtualMemory_t>(funcs[0]);
9261-
const auto pNtProtectVirtualMemory = reinterpret_cast<NtProtectVirtualMemory_t>(funcs[1]);
9262-
const auto pNtFreeVirtualMemory = reinterpret_cast<NtFreeVirtualMemory_t>(funcs[2]);
9263-
const auto pNtFlushInstructionCache = reinterpret_cast<NtFlushInstructionCache_t>(funcs[3]);
9264-
const auto pNtClose = reinterpret_cast<NtClose_t>(funcs[4]);
9265-
const auto pNtGetContextThread = reinterpret_cast<NtGetContextThread_t>(funcs[5]);
9266-
const auto pNtSetContextThread = reinterpret_cast<NtSetContextThread_t>(funcs[6]);
9265+
// volatile ensures these are loaded from stack after SEH unwind when compiled with aggresive optimizations
9266+
NtAllocateVirtualMemory_t volatile pNtAllocateVirtualMemory = reinterpret_cast<NtAllocateVirtualMemory_t>(funcs[0]);
9267+
NtProtectVirtualMemory_t volatile pNtProtectVirtualMemory = reinterpret_cast<NtProtectVirtualMemory_t>(funcs[1]);
9268+
NtFreeVirtualMemory_t volatile pNtFreeVirtualMemory = reinterpret_cast<NtFreeVirtualMemory_t>(funcs[2]);
9269+
NtFlushInstructionCache_t volatile pNtFlushInstructionCache = reinterpret_cast<NtFlushInstructionCache_t>(funcs[3]);
9270+
NtClose_t volatile pNtClose = reinterpret_cast<NtClose_t>(funcs[4]);
9271+
NtGetContextThread_t volatile pNtGetContextThread = reinterpret_cast<NtGetContextThread_t>(funcs[5]);
9272+
NtSetContextThread_t volatile pNtSetContextThread = reinterpret_cast<NtSetContextThread_t>(funcs[6]);
92679273

92689274
if (!pNtAllocateVirtualMemory || !pNtProtectVirtualMemory || !pNtFlushInstructionCache ||
92699275
!pNtFreeVirtualMemory || !pNtGetContextThread || !pNtSetContextThread || !pNtClose) {
@@ -9285,7 +9291,8 @@ struct VM {
92859291
ULONG oldProt = 0;
92869292
st = pNtProtectVirtualMemory(hCurrentProcess, &tmpBase, &tmpSz, PAGE_EXECUTE_READ, &oldProt);
92879293
if (!NT_SUCCESS(st)) {
9288-
PVOID freeBase = execMem; SIZE_T freeSize = trampSize;
9294+
PVOID freeBase = execMem;
9295+
SIZE_T freeSize = trampSize;
92899296
pNtFreeVirtualMemory(hCurrentProcess, &freeBase, &freeSize, MEM_RELEASE);
92909297
return false;
92919298
}
@@ -9300,66 +9307,96 @@ struct VM {
93009307
const HANDLE hCurrentThread = reinterpret_cast<HANDLE>(-2LL);
93019308

93029309
if (!NT_SUCCESS(pNtGetContextThread(hCurrentThread, &origCtx))) {
9303-
PVOID freeBase = execMem; SIZE_T freeSize = trampSize;
9310+
PVOID freeBase = execMem;
9311+
SIZE_T freeSize = trampSize;
93049312
pNtFreeVirtualMemory(hCurrentProcess, &freeBase, &freeSize, MEM_RELEASE);
93059313
return false;
93069314
}
93079315

9308-
// set Dr0 to trampoline+offset (step triggers here)
9316+
// Set DR0 to trampoline + 14 (Instruction: mov rbx, r8)
9317+
// Offset calculation: mov_r8_rbx(3) + pushfq(1) + or(7) + popfq(1) + cpuid(2) = 14
9318+
// This is where single step traps after CPUID, and where we want the collision
9319+
const uintptr_t expectedTrapAddr = reinterpret_cast<uintptr_t>(execMem) + 14;
9320+
9321+
// set Dr0 to trampoline+offset
93099322
CONTEXT dbgCtx = origCtx;
9310-
const uintptr_t baseAddr = reinterpret_cast<uintptr_t>(execMem);
9311-
dbgCtx.Dr0 = baseAddr + 11; // single step breakpoint address
9312-
dbgCtx.Dr7 = 1; // enable local breakpoint 0
9323+
dbgCtx.Dr0 = expectedTrapAddr; // single step breakpoint address
9324+
dbgCtx.Dr7 = 1; // enable Local Breakpoint 0
93139325

93149326
if (!NT_SUCCESS(pNtSetContextThread(hCurrentThread, &dbgCtx))) {
93159327
pNtSetContextThread(hCurrentThread, &origCtx);
9316-
PVOID freeBase = execMem; SIZE_T freeSize = trampSize;
9328+
PVOID freeBase = execMem;
9329+
SIZE_T freeSize = trampSize;
93179330
pNtFreeVirtualMemory(hCurrentProcess, &freeBase, &freeSize, MEM_RELEASE);
93189331
return false;
93199332
}
93209333

9321-
auto vetExceptions = [&](u32 code, EXCEPTION_POINTERS* info) noexcept -> u8 {
9322-
// if not single-step, hypervisor likely swatted our trap
9323-
if (code != static_cast<DWORD>(0x80000004L)) {
9324-
hypervisorCaught = true;
9325-
return EXCEPTION_CONTINUE_SEARCH;
9326-
}
9334+
// Context structure to pass data to the static SEH handler
9335+
struct TrapContext {
9336+
uintptr_t expectedTrapAddr;
9337+
u8* hitCount;
9338+
bool* hypervisorCaught;
9339+
};
93279340

9328-
// count breakpoint hits
9329-
hitCount++;
9341+
// Static class for SEH filtering to avoid Release mode Lambda corruption
9342+
struct SEH_Trap {
9343+
static LONG Vet(u32 code, EXCEPTION_POINTERS* info, TrapContext* ctx) noexcept {
9344+
// Lambda returns LONG to support EXCEPTION_CONTINUE_EXECUTION
9345+
if (code != static_cast<DWORD>(0x80000004L)) {
9346+
return EXCEPTION_CONTINUE_SEARCH;
9347+
}
93309348

9331-
// validate exception address matches our breakpoint location
9332-
if (reinterpret_cast<uintptr_t>(info->ExceptionRecord->ExceptionAddress) != baseAddr + 11) {
9333-
hypervisorCaught = true;
9334-
return EXCEPTION_EXECUTE_HANDLER;
9335-
}
9349+
// Verify exception happened at our calculated offset
9350+
if (reinterpret_cast<uintptr_t>(info->ExceptionRecord->ExceptionAddress) != ctx->expectedTrapAddr) {
9351+
info->ContextRecord->EFlags &= ~0x100; // Clear TF
9352+
info->ContextRecord->Dr7 &= ~1; // Clear DR0 Enable
9353+
*ctx->hypervisorCaught = true;
9354+
return EXCEPTION_CONTINUE_EXECUTION;
9355+
}
93369356

9337-
// check if Trap Flag and DR0 contributed
9338-
constexpr u64 required_bits = (1ULL << 14) | 1ULL;
9339-
const u64 status = info->ContextRecord->Dr6;
9357+
(*ctx->hitCount)++;
93409358

9341-
if ((status & required_bits) != required_bits) {
9342-
if (util::hyper_x() != HYPERV_ARTIFACT_VM)
9343-
hypervisorCaught = true; // detects type 1 Hyper-V too, which we consider legitimate
9359+
// check if Trap Flag and DR0 contributed
9360+
constexpr u64 required_bits = (1ULL << 14) | 1ULL; // BS | B0
9361+
const u64 status = info->ContextRecord->Dr6;
9362+
9363+
if ((status & required_bits) != required_bits) {
9364+
if (util::hyper_x() != HYPERV_ARTIFACT_VM) // detects type 1 Hyper-V too, which we consider legitimate
9365+
*ctx->hypervisorCaught = true;
9366+
}
9367+
9368+
// Clear Trap Flag to stop single stepping
9369+
info->ContextRecord->EFlags &= ~0x100;
9370+
9371+
// Clear DR7 Local Enable 0 to disable the hardware breakpoint
9372+
// If we don't do this, the next instruction will trigger the breakpoint again immediately
9373+
info->ContextRecord->Dr7 &= ~1;
9374+
9375+
// executes mov rbx, r8 (restore), and returns
9376+
return EXCEPTION_CONTINUE_EXECUTION;
93449377
}
9345-
return EXCEPTION_EXECUTE_HANDLER;
93469378
};
93479379

9380+
TrapContext ctx = { expectedTrapAddr, &hitCount, &hypervisorCaught };
9381+
93489382
__try {
93499383
reinterpret_cast<void(*)()>(execMem)();
93509384
}
9351-
__except (vetExceptions(_exception_code(), reinterpret_cast<EXCEPTION_POINTERS*>(_exception_info()))) {
9352-
// if we didn't hit exactly once, assume hypervisor interference
9353-
if (hitCount != 1) {
9354-
hypervisorCaught = true;
9355-
}
9385+
__except (SEH_Trap::Vet(_exception_code(), reinterpret_cast<EXCEPTION_POINTERS*>(_exception_info()), &ctx)) {
9386+
// This block is effectively unreachable because vetExceptions returns CONTINUE_EXECUTION or CONTINUE_SEARCH
9387+
}
9388+
9389+
// If the hypervisor swallowed the exception entirely, hitCount will be 0
9390+
if (hitCount != 1) {
9391+
hypervisorCaught = true;
93569392
}
93579393

93589394
pNtSetContextThread(hCurrentThread, &origCtx);
93599395

9360-
PVOID freeBase = execMem; SIZE_T freeSize = trampSize;
9396+
PVOID freeBase = execMem;
9397+
SIZE_T freeSize = trampSize;
93619398
pNtFreeVirtualMemory(hCurrentProcess, &freeBase, &freeSize, MEM_RELEASE);
9362-
#endif
9399+
#endif
93639400
return hypervisorCaught;
93649401
}
93659402

0 commit comments

Comments
 (0)