Skip to content
This repository was archived by the owner on Jun 9, 2020. It is now read-only.

Commit a3f9a49

Browse files
committed
add vsyscall emulation in page fault handler
If we hit a PF for an address in the vsyscall page, allocate some memory in the process address space which will do `syscall;retq`, and depending on the offset into the vsyscall page set `RAX` to the desired syscall number. This does not implement a true fast path for syscalls, nor does it implement vDSO.
1 parent b61c85c commit a3f9a49

File tree

1 file changed

+85
-2
lines changed

1 file changed

+85
-2
lines changed

src/main.c

Lines changed: 85 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <sys/sysctl.h>
2323

2424
#include <mach-o/dyld.h>
25+
#include <sys/mman.h>
2526

2627
static int
2728
get_cpuid_count (unsigned int leaf,
@@ -79,6 +80,85 @@ handle_syscall(void)
7980
return 0;
8081
}
8182

83+
#define VSYSCALL_PAGE_ADDR 0xffffffffff600000
84+
85+
static inline bool
86+
is_vsyscall(gaddr_t gladdr)
87+
{
88+
if (gladdr < VSYSCALL_PAGE_ADDR || gladdr > VSYSCALL_PAGE_ADDR + 0x1000) {
89+
//printk("Page Fault is not for vsyscall: %llx\n", gladdr);
90+
return false;
91+
}
92+
return true;
93+
}
94+
95+
/* vsyscall (and its latter day replacement vDSO) is a way to implement fast
96+
* paths for frequently called syscalls like `gettimeofday` and `time` without
97+
* generating the overhead of a context switch into the kernel.
98+
*
99+
* Darwin/XNU has a similar functionality in the form of COMMPAGE:
100+
* https://wiki.darlinghq.org/documentation:commpage
101+
*
102+
* Currently, instead of providing a fast path, we rely on vsyscall emulation
103+
* by executing the syscall in the way all syscalls are currently implemented.
104+
* This is similar to what the Linux kernel does as well:
105+
* https://github.com/torvalds/linux/blob/v4.20/arch/x86/entry/vsyscall/vsyscall_emu_64.S
106+
*/
107+
108+
static gaddr_t vsyscall_page = 0;
109+
110+
static inline bool
111+
handle_vsyscall(gaddr_t gladdr)
112+
{
113+
if (!is_vsyscall(gladdr))
114+
return false;
115+
116+
// Define a location in the process' address space to execute the syscall
117+
if (vsyscall_page == 0) {
118+
// raw OP code for `syscall;retq`
119+
char data[3] = {0x0f,0x05,0xC3};
120+
121+
vsyscall_page = do_mmap(0, sizeof(data), PROT_WRITE | PROT_READ,
122+
LINUX_PROT_READ | LINUX_PROT_EXEC, LINUX_MAP_ANONYMOUS |
123+
LINUX_MAP_PRIVATE, -1, 0);
124+
125+
printk("allocated %llx for vsyscall_page\n", vsyscall_page);
126+
127+
copy_to_user(vsyscall_page, data, sizeof(data));
128+
}
129+
130+
bool handled = false;
131+
132+
// These are the hardcoded offsets on x86_64, I see no reason to be more
133+
// clever than this here given this is likely to be our only emulation
134+
// target
135+
switch(gladdr) {
136+
case VSYSCALL_PAGE_ADDR:
137+
vmm_write_register(HV_X86_RAX, 96 /* gettimeofday */);
138+
handled = true;
139+
break;
140+
case VSYSCALL_PAGE_ADDR + 0x400:
141+
vmm_write_register(HV_X86_RAX, 201 /* time */);
142+
handled = true;
143+
break;
144+
case VSYSCALL_PAGE_ADDR + 0x800:
145+
vmm_write_register(HV_X86_RAX, 309 /* getcpu */);
146+
handled = true;
147+
break;
148+
default:
149+
printk("page fault for vsyscall -- 0x%llx\n", gladdr);
150+
break;
151+
}
152+
153+
if (handled) {
154+
// set RIP to our vsyscall emulation, where the CPU will end up upon
155+
// resumption
156+
vmm_write_register(HV_X86_RIP, vsyscall_page);
157+
}
158+
159+
return handled;
160+
}
161+
82162
int
83163
task_run()
84164
{
@@ -243,8 +323,11 @@ main_loop(int return_on_sigret)
243323
/* FIXME */
244324
uint64_t gladdr;
245325
vmm_read_vmcs(VMCS_RO_EXIT_QUALIFIC, &gladdr);
246-
printk("page fault: caused by guest linear address 0x%llx\n", gladdr);
247-
send_signal(getpid(), LINUX_SIGSEGV);
326+
if (!handle_vsyscall(gladdr)) {
327+
printk("page fault: caused by guest linear address 0x%llx\n", gladdr);
328+
send_signal(getpid(), LINUX_SIGSEGV);
329+
}
330+
break;
248331
}
249332
case X86_VEC_UD: {
250333
uint64_t instlen, rip;

0 commit comments

Comments
 (0)