Skip to content

Commit 74cc792

Browse files
committed
Add support for executable remappings and blanket executable heap
1 parent 8658a0e commit 74cc792

File tree

6 files changed

+114
-55
lines changed

6 files changed

+114
-55
lines changed

lib/tinykvm/amd64/paging.cpp

Lines changed: 76 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,72 @@ inline bool is_flagged_page(uint64_t flags, uint64_t entry) {
5656
return (entry & flags) == flags;
5757
}
5858

59+
static void add_remappings(vMemory& memory,
60+
const VirtualRemapping& remapping,
61+
uint64_t* pml4,
62+
uint64_t flags,
63+
uint64_t& free_page)
64+
{
65+
if (remapping.virt <= free_page)
66+
throw MachineException("Invalid remapping address", remapping.virt);
67+
if (remapping.size % vMemory::PageSize() != 0)
68+
throw MachineException("Invalid remapping size", remapping.size);
69+
const auto virt_tera_page = (remapping.virt >> 39UL) & 511;
70+
const auto virt_giga_page = (remapping.virt >> 30UL) & 511;
71+
72+
uint64_t paddr_base = remapping.phys;
73+
if (paddr_base == 0x0) {
74+
constexpr auto PD_ALIGN_MASK = (1ULL << 21U) - 1;
75+
// Over-allocate rounding up to nearest 2MB
76+
paddr_base = memory.machine.mmap_allocate(remapping.size + PD_ALIGN_MASK);
77+
paddr_base = (paddr_base + PD_ALIGN_MASK) & ~PD_ALIGN_MASK;
78+
// Relax allocation down to size
79+
memory.machine.mmap() = paddr_base + remapping.size;
80+
}
81+
82+
if (pml4[virt_tera_page] == 0) {
83+
const auto pdpt_addr = free_page;
84+
free_page += 0x1000;
85+
86+
pml4[virt_tera_page] = PDE64_PRESENT | PDE64_USER | PDE64_RW | pdpt_addr;
87+
}
88+
89+
auto pdpt_addr = pml4[virt_tera_page] & PDE64_ADDR_MASK;
90+
auto* pdpt = memory.page_at(pdpt_addr);
91+
92+
// Allocate the gigapage with 512x 2MB entries
93+
if (pdpt[virt_giga_page] == 0) {
94+
const auto giga_page = free_page;
95+
free_page += 0x1000;
96+
pdpt[virt_giga_page] = PDE64_PRESENT | PDE64_USER | PDE64_RW | giga_page;
97+
}
98+
99+
auto pd_addr = pdpt[virt_giga_page] & PDE64_ADDR_MASK;
100+
auto* pd = memory.page_at(pd_addr);
101+
102+
// Create 2MB entries for remapping size
103+
const auto n_2mb_pages = (remapping.size >> 21UL) & 511;
104+
for (uint64_t i = 0; i < 512; i++)
105+
{
106+
const auto paddr = paddr_base + (i << 21UL);
107+
if (i < n_2mb_pages)
108+
pd[i] = PDE64_PRESENT | flags | PDE64_PS | paddr;
109+
else
110+
pd[i] = 0;
111+
}
112+
113+
// Track the first seen executable mapping, allowing mmap to use it for
114+
// JIT segments.
115+
if (remapping.executable && memory.vmem_exec_begin == 0)
116+
{
117+
memory.vmem_exec_begin = remapping.virt;
118+
memory.vmem_exec_end = remapping.virt + remapping.size;
119+
}
120+
}
121+
59122
uint64_t setup_amd64_paging(vMemory& memory,
60-
std::string_view binary, const std::vector<VirtualRemapping>& remappings)
123+
std::string_view binary,
124+
const std::vector<VirtualRemapping>& remappings)
61125
{
62126
static constexpr uint64_t PD_MASK = (1ULL << 30) - 1;
63127
const size_t PD_PAGES = (memory.size + PD_MASK) >> 30;
@@ -136,10 +200,16 @@ uint64_t setup_amd64_paging(vMemory& memory,
136200
}
137201

138202
// Covers 1GB pages with 512x 2MB user-read-write entries
203+
// NOTE: Even with executable heap, the ELF loader will still correctly
204+
// apply the NX-bit to its own segments.
205+
uint64_t heap_flags = PDE64_USER | PDE64_RW;
206+
if (!memory.executable_heap)
207+
heap_flags |= PDE64_NX;
139208
for (uint64_t i = base_2mb_page+2; i < 512*PD_PAGES; i++) {
140-
pd[i] = PDE64_PRESENT | PDE64_PS | PDE64_USER | PDE64_RW | PDE64_NX
209+
pd[i] = PDE64_PRESENT | PDE64_PS | heap_flags
141210
| ((base_giga_page << 30) + (i << 21));
142211
}
212+
printf("Heap is executable: %d\n", memory.executable_heap);
143213

144214
/* ELF executable area */
145215
if (!binary.empty())
@@ -234,53 +304,10 @@ uint64_t setup_amd64_paging(vMemory& memory,
234304
/* Virtual memory remappings (up to 1GB each, for now) */
235305
for (const auto& vmem : remappings)
236306
{
237-
if (vmem.virt <= free_page)
238-
throw MachineException("Invalid remapping address", vmem.virt);
239-
if (vmem.size % vMemory::PageSize() != 0)
240-
throw MachineException("Invalid remapping size", vmem.size);
241-
const auto virt_tera_page = (vmem.virt >> 39UL) & 511;
242-
const auto virt_giga_page = (vmem.virt >> 30UL) & 511;
243-
244-
uint64_t paddr_base = vmem.phys;
245-
if (paddr_base == 0x0) {
246-
constexpr auto PD_ALIGN_MASK = (1ULL << 21U) - 1;
247-
// Over-allocate rounding up to nearest 2MB
248-
paddr_base = memory.machine.mmap_allocate(vmem.size + PD_ALIGN_MASK);
249-
paddr_base = (paddr_base + PD_ALIGN_MASK) & ~PD_ALIGN_MASK;
250-
// Relax allocation down to size
251-
memory.machine.mmap() = paddr_base + vmem.size;
252-
}
253-
254-
if (pml4[virt_tera_page] == 0) {
255-
const auto pdpt_addr = free_page;
256-
free_page += 0x1000;
257-
258-
pml4[virt_tera_page] = PDE64_PRESENT | PDE64_USER | PDE64_RW | pdpt_addr;
259-
}
260-
261-
auto pdpt_addr = pml4[virt_tera_page] & PDE64_ADDR_MASK;
262-
auto* pdpt = memory.page_at(pdpt_addr);
263-
264-
// Allocate the gigapage with 512x 2MB entries
265-
if (pdpt[virt_giga_page] == 0) {
266-
const auto giga_page = free_page;
267-
free_page += 0x1000;
268-
pdpt[virt_giga_page] = PDE64_PRESENT | PDE64_USER | PDE64_RW | giga_page;
269-
}
270-
271-
auto pd_addr = pdpt[virt_giga_page] & PDE64_ADDR_MASK;
272-
auto* pd = memory.page_at(pd_addr);
273-
274-
// Create 2MB entries for remapping size
275-
const auto n_2mb_pages = (vmem.size >> 21UL) & 511;
276-
for (uint64_t i = 0; i < 512; i++)
277-
{
278-
const auto paddr = paddr_base + (i << 21UL);
279-
if (i < n_2mb_pages)
280-
pd[i] = PDE64_PRESENT | PDE64_USER | PDE64_RW | PDE64_NX | PDE64_PS | paddr;
281-
else
282-
pd[i] = 0;
283-
}
307+
uint64_t flags = PDE64_USER | PDE64_NX;
308+
if (vmem.writable) flags |= PDE64_RW;
309+
if (vmem.executable) flags &= ~PDE64_NX;
310+
add_remappings(memory, vmem, pml4, flags, free_page);
284311
}
285312

286313
// vDSO / vsyscall

lib/tinykvm/common.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ namespace tinykvm
2525
uint64_t phys;
2626
uint64_t virt;
2727
size_t size;
28+
bool writable = false;
29+
bool executable = false;
2830
};
2931

3032
struct MachineOptions {
@@ -51,6 +53,8 @@ namespace tinykvm
5153
bool allow_reset_to_new_master = false;
5254
/* Allow fixed addresses with mmap(). */
5355
bool allow_fixed_mmap = false;
56+
/* Make heap executable, to support JIT. */
57+
bool executable_heap = false;
5458
};
5559

5660
class MachineException : public std::exception {

lib/tinykvm/machine_utils.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#include "machine.hpp"
22

33
#include <cstring>
4-
#define USERMODE_FLAGS (0x7 | 1UL << 63) /* USER, READ/WRITE, PRESENT, NX */
54

65
namespace tinykvm {
76
static constexpr uint64_t PageMask() {
@@ -16,7 +15,7 @@ void Machine::memzero(address_t addr, size_t len)
1615
{
1716
const size_t offset = addr & PageMask();
1817
const size_t size = std::min(vMemory::PageSize() - offset, len);
19-
auto* page = memory.get_writable_page(addr & ~PageMask(), USERMODE_FLAGS, true);
18+
auto* page = memory.get_writable_page(addr & ~PageMask(), memory.expectedUsermodeFlags(), true);
2019
std::memset(&page[offset], 0, size);
2120

2221
addr += size;
@@ -38,7 +37,7 @@ void Machine::copy_to_guest(address_t addr, const void* vsrc, size_t len, bool z
3837
{
3938
const size_t offset = addr & PageMask();
4039
const size_t size = std::min(vMemory::PageSize() - offset, len);
41-
auto* page = memory.get_writable_page(addr & ~PageMask(), USERMODE_FLAGS, zeroes);
40+
auto* page = memory.get_writable_page(addr & ~PageMask(), memory.expectedUsermodeFlags(), zeroes);
4241
std::copy(src, src + size, &page[offset]);
4342

4443
addr += size;
@@ -135,7 +134,7 @@ size_t Machine::writable_buffers_from_range(
135134
{
136135
const size_t offset = addr & PageMask();
137136
const size_t size = std::min(vMemory::PageSize() - offset, len);
138-
auto *page = memory.get_writable_page(addr & ~PageMask(), USERMODE_FLAGS, false);
137+
auto *page = memory.get_writable_page(addr & ~PageMask(), memory.expectedUsermodeFlags(), false);
139138

140139
auto* ptr = (char*) &page[offset];
141140
if (last && ptr == last->ptr + last->len) {
@@ -173,7 +172,7 @@ void Machine::copy_from_machine(address_t addr, Machine& src, address_t sa, size
173172
const size_t offset = addr & PageMask();
174173
const size_t size = std::min(vMemory::PageSize() - offset, buf.len);
175174
/* NOTE: We could use zeroes if remaining is >= PageSize() */
176-
auto *page = memory.get_writable_page(addr & ~PageMask(), USERMODE_FLAGS, false);
175+
auto *page = memory.get_writable_page(addr & ~PageMask(), memory.expectedUsermodeFlags(), false);
177176
std::copy(buf.ptr, buf.ptr + size, &page[offset]);
178177

179178
if (size == buf.len) {

lib/tinykvm/memory.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ vMemory::vMemory(Machine& m, const MachineOptions& options,
2626
ptr(p), size(overaligned_memsize(s)), owned(own),
2727
main_memory_writes(options.master_direct_memory_writes),
2828
split_hugepages(options.split_hugepages),
29+
executable_heap(options.executable_heap),
2930
banks(m, options)
3031
{
3132
// Main memory is not always starting at 0x0
@@ -35,6 +36,7 @@ vMemory::vMemory(Machine& m, const MachineOptions& options,
3536
vMemory::vMemory(Machine& m, const MachineOptions& options, const vMemory& other)
3637
: vMemory{m, options, other.physbase, other.safebase, other.ptr, other.size, false}
3738
{
39+
this->executable_heap = other.executable_heap;
3840
}
3941
vMemory::~vMemory()
4042
{
@@ -125,7 +127,7 @@ char* vMemory::safely_at(uint64_t addr, size_t asize)
125127
const auto offset = addr & PageMask();
126128
if (offset + asize <= vMemory::PageSize())
127129
{
128-
auto* page = this->get_writable_page(pagebase, USERMODE_FLAGS, false);
130+
auto* page = this->get_writable_page(pagebase, expectedUsermodeFlags(), false);
129131
return &page[offset];
130132
}
131133

@@ -296,4 +298,13 @@ void vMemory::memory_exception(const char* msg, uint64_t addr, uint64_t size)
296298
throw MemoryException(msg, addr, size);
297299
}
298300

301+
302+
uint64_t vMemory::expectedUsermodeFlags() const noexcept
303+
{
304+
uint64_t flags = PDE64_PRESENT | PDE64_USER | PDE64_RW;
305+
if (!this->executable_heap)
306+
flags |= PDE64_NX;
307+
return flags;
299308
}
309+
310+
} // namespace tinykvm

lib/tinykvm/memory.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ struct vMemory {
1919
uint64_t physbase;
2020
uint64_t safebase;
2121
uint64_t page_tables;
22+
/* Optional executable memory range */
23+
uint64_t vmem_exec_begin = 0;
24+
uint64_t vmem_exec_end = 0;
2225
/* Linear memory */
2326
char* ptr;
2427
size_t size;
@@ -28,6 +31,8 @@ struct vMemory {
2831
bool main_memory_writes = false;
2932
/* Split into small pages (4K) when reaching a leaf hugepage. */
3033
bool split_hugepages = true;
34+
/* Executable heap */
35+
bool executable_heap = false;
3136
/* Dynamic page memory */
3237
MemoryBanks banks; // fault-in memory banks
3338
/* SMP mutex */
@@ -67,6 +72,8 @@ struct vMemory {
6772
again in order to support itself. It has already been made forkable. */
6873
bool is_forkable_master() const noexcept;
6974

75+
uint64_t expectedUsermodeFlags() const noexcept;
76+
7077
/* Create new identity-mapped memory regions */
7178
vMemory(Machine&, const MachineOptions&, uint64_t, uint64_t, char*, size_t, bool = true);
7279
/* Loan memory from another machine */

src/functions.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,15 +118,26 @@ void setup_kvm_system_calls()
118118
Machine::install_syscall_handler(
119119
9, [] (auto& cpu) { // MMAP
120120
auto& regs = cpu.registers();
121+
const auto flags = regs.r10;
121122
if (UNLIKELY(regs.rdi % vMemory::PageSize() != 0 || regs.rsi == 0)) {
122123
// Size not matching a 4K page size
123124
regs.rax = ~0LL; /* MAP_FAILED */
124125
} else if (UNLIKELY(int(regs.r8) >= 0)) {
125126
// mmap to file fd (*NOT* supported)
126127
regs.rax = ~0LL; /* MAP_FAILED */
128+
} else if ((flags & 0x4) != 0) {
129+
// Executable mappings are supported if there is an execute-range in vMemory
130+
auto& memory = cpu.machine().main_memory();
131+
if (memory.vmem_exec_begin != 0x0) {
132+
regs.rax = memory.vmem_exec_begin;
133+
memory.vmem_exec_begin += regs.rsi;
134+
} else {
135+
regs.rax = ~0LL; /* MAP_FAILED */
136+
}
127137
} else if (regs.rdi != 0x0 && cpu.machine().allow_fixed_mmap()) {
128138
regs.rax = regs.rdi;
129139
} else if (regs.rdi != 0x0 && regs.rdi >= cpu.machine().heap_address() && regs.rdi < cpu.machine().mmap_start()) {
140+
// Existing range already mmap'ed
130141
regs.rax = regs.rdi;
131142
} else {
132143
// Round up to nearest power-of-two

0 commit comments

Comments
 (0)