Skip to content

Commit 0f0265d

Browse files
authored
Merge pull request #25 from varnish/constrained_direct_memory_writes
Implement constrained direct memory writes
2 parents f0367e6 + 63cbbfc commit 0f0265d

File tree

8 files changed

+113
-44
lines changed

8 files changed

+113
-44
lines changed

lib/tinykvm/amd64/paging.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -572,8 +572,12 @@ char * writable_page_at(vMemory& memory, uint64_t addr, uint64_t verify_flags, b
572572
auto* pdpt = memory.page_at(pdpt_mem);
573573
/* Make copy of page if needed */
574574
if (is_copy_on_write(pml4[i])) {
575-
clone_and_update_entry(memory, pml4[i], pdpt, PDE64_RW);
576-
CLPRINT("-> Cloning a PML4 entry %lu: 0x%lX at %p\n", i, pml4[i], pdpt);
575+
if (memory.main_memory_writes) {
576+
unlock_identity_mapped_entry(pml4[i]);
577+
} else {
578+
clone_and_update_entry(memory, pml4[i], pdpt, PDE64_RW);
579+
CLPRINT("-> Cloning a PML4 entry %lu: 0x%lX at %p\n", i, pml4[i], pdpt);
580+
}
577581
assert(!is_copy_on_write(pml4[i]) && (pml4[i] & PDE64_PRESENT));
578582
}
579583
const uint64_t j = index_from_pdpt_entry(addr);
@@ -582,8 +586,12 @@ char * writable_page_at(vMemory& memory, uint64_t addr, uint64_t verify_flags, b
582586
auto* pd = memory.page_at(pd_mem);
583587
/* Make copy of page if needed */
584588
if (is_copy_on_write(pdpt[j])) {
585-
clone_and_update_entry(memory, pdpt[j], pd, PDE64_RW);
586-
CLPRINT("-> Cloning a PDPT entry: 0x%lX\n", pdpt[j]);
589+
if (memory.main_memory_writes) {
590+
unlock_identity_mapped_entry(pdpt[j]);
591+
} else {
592+
clone_and_update_entry(memory, pdpt[j], pd, PDE64_RW);
593+
CLPRINT("-> Cloning a PDPT entry: 0x%lX\n", pdpt[j]);
594+
}
587595
}
588596
const uint64_t k = index_from_pd_entry(addr);
589597
if (pd[k] & PDE64_PRESENT) {
@@ -594,7 +602,13 @@ char * writable_page_at(vMemory& memory, uint64_t addr, uint64_t verify_flags, b
594602
/* Copy-on-write 2MB page */
595603

596604
/* NOTE: Make sure we are re-reading pd[k] */
597-
if (memory.split_hugepages && (pd[k] & PDE64_PS)) { // 2MB page
605+
if (memory.main_memory_writes) {
606+
unlock_identity_mapped_entry(pd[k]);
607+
if (pd[k] & PDE64_PS) {
608+
memory.increment_unlocked_pages(512);
609+
}
610+
goto entry_is_no_longer_copy_on_write;
611+
} else if (memory.split_hugepages && (pd[k] & PDE64_PS)) { // 2MB page
598612
CLPRINT("-> Splitting a 2MB page, addr=0x%lX rw=%lu cloneable=%lu\n",
599613
addr, pd[k] & PDE64_RW, pd[k] & PDE64_CLONEABLE);
600614
/* Remove PS flag */
@@ -656,6 +670,7 @@ char * writable_page_at(vMemory& memory, uint64_t addr, uint64_t verify_flags, b
656670
CLPRINT("-> Cloning a PD entry: 0x%lX\n", pd[k]);
657671
}
658672

673+
entry_is_no_longer_copy_on_write:
659674
if (pd[k] & PDE64_PS) { // 2MB page
660675
if (UNLIKELY((pd[k] & verify_flags) != verify_flags)) {
661676
memory_exception("page_at: pt entry not user writable", addr, pd[k]);
@@ -673,6 +688,7 @@ char * writable_page_at(vMemory& memory, uint64_t addr, uint64_t verify_flags, b
673688
if (is_copy_on_write(pt[e])) {
674689
if (memory.is_forkable_master() && memory.main_memory_writes) {
675690
unlock_identity_mapped_entry(pt[e]);
691+
memory.increment_unlocked_pages(1);
676692
} else if (write_zeroes || (pt[e] & PDE64_DIRTY) == 0x0) {
677693
zero_and_update_entry(memory, pt[e], data, PDE64_RW);
678694
} else {

lib/tinykvm/common.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ namespace tinykvm
3737
uint32_t reset_free_work_mem = 0; /* reset_to() */
3838
uint64_t vmem_base_address = 0;
3939
std::string_view binary = {};
40-
std::vector<VirtualRemapping> remappings;
40+
std::vector<VirtualRemapping> remappings {};
4141

4242
bool verbose_loader = false;
4343
bool short_lived = false;
@@ -46,7 +46,7 @@ namespace tinykvm
4646
/* When enabled, master VMs will write directly
4747
to their own main memory instead of memory banks,
4848
allowing forks to immediately see changes. */
49-
bool master_direct_memory_writes = true;
49+
bool master_direct_memory_writes = false;
5050
/* When enabled, split hugepages during page faults. */
5151
bool split_hugepages = false;
5252
/* When enabled, reset_to() will accept a different

lib/tinykvm/memory.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "machine.hpp"
22
#include <cstring>
33
#include <sys/mman.h>
4+
#include <stdexcept>
45
#include <string>
56
#include <unistd.h>
67
#include <unordered_set>
@@ -285,11 +286,7 @@ size_t Machine::banked_memory_pages() const noexcept
285286
}
286287
size_t Machine::banked_memory_capacity_pages() const noexcept
287288
{
288-
size_t count = 0;
289-
for (const auto& bank : memory.banks) {
290-
count += bank.n_pages;
291-
}
292-
return count;
289+
return memory.banks.max_pages();
293290
}
294291

295292
__attribute__((cold, noreturn))
@@ -298,6 +295,18 @@ void vMemory::memory_exception(const char* msg, uint64_t addr, uint64_t size)
298295
throw MemoryException(msg, addr, size);
299296
}
300297

298+
void vMemory::increment_unlocked_pages(size_t pages)
299+
{
300+
if (this->main_memory_writes) {
301+
this->unlocked_pages += pages;
302+
if (this->unlocked_pages > this->banks.max_pages()) {
303+
memory_exception("Out of working memory",
304+
this->unlocked_pages * PAGE_SIZE, this->banks.max_pages() * PAGE_SIZE);
305+
}
306+
} else {
307+
memory_exception("Memory::increment_unlocked_pages() without direct main memory writes enabled", 0, pages);
308+
}
309+
}
301310

302311
uint64_t vMemory::expectedUsermodeFlags() const noexcept
303312
{

lib/tinykvm/memory.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ struct vMemory {
2222
/* Optional executable memory range */
2323
uint64_t vmem_exec_begin = 0;
2424
uint64_t vmem_exec_end = 0;
25+
/* Counter for the number of pages that have been unlocked
26+
in the main memory. */
27+
size_t unlocked_pages = 0;
2528
/* Linear memory */
2629
char* ptr;
2730
size_t size;
@@ -61,6 +64,14 @@ struct vMemory {
6164
MemoryBank::Page new_hugepage();
6265

6366
bool compare(const vMemory& other);
67+
/* When a main VM has direct memory writes enabled, it can
68+
write directly to its own memory, but in order to constrain
69+
the memory usage, we need to keep track of the number of
70+
pages that have been unlocked. */
71+
void increment_unlocked_pages(size_t pages);
72+
size_t unlocked_memory_pages() const noexcept {
73+
return unlocked_pages;
74+
}
6475

6576
VirtualMem vmem() const;
6677

lib/tinykvm/memory_bank.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ static constexpr bool MADVISE_NOT_DELETE = true;
1414

1515
MemoryBanks::MemoryBanks(Machine& machine, const MachineOptions& options)
1616
: m_machine { machine },
17-
m_arena_begin { 0x7000000000 },
17+
m_arena_begin { ARENA_BASE_ADDRESS },
1818
m_arena_next { m_arena_begin },
1919
m_idx_begin { FIRST_BANK_IDX },
2020
m_idx { m_idx_begin },
@@ -27,10 +27,8 @@ void MemoryBanks::set_max_pages(size_t new_max)
2727
this->m_max_pages = new_max;
2828
//fprintf(stderr, "max_pages: %zu/%zu\n", m_mem.size(), new_max);
2929
/* Reserve the maximum number of banks possible.
30-
We have to + 1 to make sure it's rounded up, avoiding
31-
any possible reallocations close to being out of memory.
3230
NOTE: DO NOT modify this! Needs deque behavior. */
33-
m_mem.reserve(m_max_pages / MemoryBank::N_PAGES + 1);
31+
m_mem.reserve((m_max_pages + MemoryBank::N_PAGES-1) / MemoryBank::N_PAGES);
3432
}
3533

3634
char* MemoryBanks::try_alloc(size_t N)
@@ -81,8 +79,8 @@ MemoryBank& MemoryBanks::get_available_bank(size_t pages)
8179
/* Allocate new memory bank if we are not maxing out memory */
8280
if (m_num_pages < m_max_pages) {
8381
if constexpr (VERBOSE_MEMORY_BANK) {
84-
printf("Allocating new bank at 0x%lX with total pages %u\n",
85-
m_arena_next, m_num_pages + MemoryBank::N_PAGES);
82+
printf("Allocating new bank at 0x%lX with total pages %u/%u\n",
83+
m_arena_next, m_num_pages + MemoryBank::N_PAGES, m_max_pages);
8684
}
8785
auto& bank = this->allocate_new_bank(m_arena_next);
8886
m_num_pages += bank.n_pages;

lib/tinykvm/memory_bank.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,14 @@ struct MemoryBank {
5050

5151
struct MemoryBanks {
5252
static constexpr unsigned FIRST_BANK_IDX = 2;
53+
static constexpr uint64_t ARENA_BASE_ADDRESS = 0x7000000000;
5354

5455
MemoryBanks(Machine&, const MachineOptions&);
5556

5657
MemoryBank& get_available_bank(size_t n_pages);
5758
void reset(const MachineOptions&);
5859
void set_max_pages(size_t new_max);
60+
size_t max_pages() const noexcept { return m_max_pages; }
5961

6062
bool using_hugepages() const noexcept { return m_using_hugepages; }
6163

lib/tinykvm/vcpu.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -377,14 +377,12 @@ void vCPU::set_vcpu_table_at(unsigned index, int value)
377377

378378
void Machine::prepare_copy_on_write(size_t max_work_mem, uint64_t shared_memory_boundary)
379379
{
380-
assert(this->m_prepped == false);
381380
this->m_prepped = true;
382381
/* Make each writable page read-only, causing page fault.
383382
any page after the @shared_memory_boundary is untouched,
384383
effectively turning it into a shared memory area for all. */
385384
if (shared_memory_boundary == 0)
386385
shared_memory_boundary = UINT64_MAX;
387-
foreach_page_makecow(this->memory, kernel_end_address(), shared_memory_boundary);
388386

389387
// Visualizing the page tables after makecow should show that all
390388
// relevant user-writable pages have been made read-only and cloneable
@@ -395,10 +393,27 @@ void Machine::prepare_copy_on_write(size_t max_work_mem, uint64_t shared_memory_
395393
memory.banks.set_max_pages(max_work_mem / PAGE_SIZE);
396394
/* Without working memory we will not be able to make
397395
this master VM usable after prepare_copy_on_write. */
398-
if (max_work_mem == 0)
396+
if (max_work_mem == 0) {
397+
/* If there are previously banked pages, we need to
398+
flatten them into the main memory. */
399+
/// XXX: Implement memory flattening
400+
memory.page_tables = memory.physbase + PT_ADDR;
401+
struct kvm_sregs sregs = this->get_special_registers();
402+
403+
/* Page table entry will be cloned at the start */
404+
sregs.cr3 = memory.page_tables;
405+
sregs.cr0 |= CR0_WP;
406+
407+
vcpu.set_special_registers(sregs);
408+
this->enter_usermode();
409+
410+
foreach_page_makecow(this->memory, kernel_end_address(), shared_memory_boundary);
399411
return;
412+
}
413+
400414
/* This call makes this VM usable after making every page in the
401415
page tables read-only, enabling memory through page faults. */
416+
foreach_page_makecow(this->memory, kernel_end_address(), shared_memory_boundary);
402417
this->setup_cow_mode(this);
403418
}
404419
void Machine::setup_cow_mode(const Machine* other)

tests/unit/fork.cpp

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ extern void prints_hello_world() {
4242

4343
// Make machine forkable (no working memory)
4444
machine.prepare_copy_on_write(65536);
45-
REQUIRE(machine.banked_memory_pages() == 4);
45+
REQUIRE(machine.banked_memory_pages() == 5);
4646
REQUIRE(machine.is_forkable());
4747
REQUIRE(!machine.is_forked());
4848

@@ -249,7 +249,7 @@ extern int get_value() {
249249

250250
// Make machine forkable (with working memory)
251251
machine.prepare_copy_on_write(65536);
252-
REQUIRE(machine.banked_memory_pages() == 4);
252+
REQUIRE(machine.banked_memory_pages() == 5);
253253
REQUIRE(machine.is_forkable());
254254
REQUIRE(!machine.is_forked());
255255

@@ -293,10 +293,10 @@ extern int get_value() {
293293

294294
// Value now starts at 1 due to the change in main VM
295295
fork1.timed_vmcall(funcaddr, 4.0f);
296-
REQUIRE(fork1.return_value() == 2);
296+
REQUIRE(fork1.return_value() == 1);
297297

298298
fork2.timed_vmcall(funcaddr, 4.0f);
299-
REQUIRE(fork2.return_value() == 2);
299+
REQUIRE(fork2.return_value() == 1);
300300
}
301301

302302
TEST_CASE("Fork sanity checks w/crashes", "[Fork]")
@@ -363,7 +363,9 @@ extern void crash() {
363363
TEST_CASE("Fork and run main()", "[Fork]")
364364
{
365365
const auto binary = build_and_load(R"M(
366+
#include <stdio.h>
366367
int main() {
368+
printf("Hello World!\n");
367369
return 666;
368370
}
369371
static unsigned value = 12345;
@@ -378,17 +380,20 @@ int func2() {
378380
}
379381
)M");
380382

381-
tinykvm::Machine machine { binary, { .max_mem = MAX_MEMORY } };
383+
tinykvm::Machine machine { binary, {
384+
.max_mem = MAX_MEMORY,
385+
.master_direct_memory_writes = true
386+
} };
382387

383388
// We need to create a Linux environment for runtimes to work well
384389
machine.setup_linux({"fork"}, env);
385390
REQUIRE(machine.banked_memory_pages() == 0);
386391

387-
// Make machine forkable (with *NO* working memory)
392+
// Make machine forkable (with 4MB working memory)
388393
machine.prepare_copy_on_write(4ULL << 20);
394+
REQUIRE(machine.banked_memory_capacity_bytes() == 4ULL << 20);
389395
REQUIRE(machine.is_forkable());
390396
REQUIRE(!machine.is_forked());
391-
REQUIRE(machine.return_value() == 0); // Initial register value
392397

393398
// Run for at most 4 seconds before giving up
394399
machine.run(4.0f);
@@ -397,11 +402,10 @@ int func2() {
397402
// We only gave it 4MB working memory, so lets mmap allocate that and verify
398403
// that if we write more than that, we get an exception thrown
399404
REQUIRE_THROWS([&] () {
400-
const size_t size = 5ULL << 20;
401-
uint64_t addr = machine.mmap_allocate(5ULL << 20);
405+
const size_t size = 8ULL << 20;
406+
uint64_t addr = machine.mmap_allocate(size);
402407
char buffer[4096];
403-
for (int i = 0; i < 4096; i++)
404-
buffer[i] = 'a';
408+
memset(buffer, 'a', sizeof(buffer));
405409
for (size_t i = 0; i < size; i += 4096)
406410
{
407411
machine.copy_to_guest(addr + i, buffer, 4096);
@@ -411,11 +415,33 @@ int func2() {
411415
}());
412416

413417
// There are banked pages now
414-
const auto banked_pages_before = machine.banked_memory_pages();
418+
const auto banked_pages_before = machine.main_memory().unlocked_memory_pages();
415419
REQUIRE(banked_pages_before > 500);
416420

421+
// We have no free memory now, so make another VM
422+
tinykvm::Machine machine2 { binary, {
423+
.max_mem = MAX_MEMORY,
424+
.master_direct_memory_writes = true
425+
} };
426+
427+
// We need to create a Linux environment for runtimes to work well
428+
machine2.setup_linux({"fork"}, env);
429+
REQUIRE(machine2.banked_memory_pages() == 0);
430+
431+
// Make machine forkable (with 4MB working memory)
432+
machine2.prepare_copy_on_write(4ULL << 20);
433+
REQUIRE(machine2.banked_memory_capacity_bytes() == 4ULL << 20);
434+
REQUIRE(machine2.is_forkable());
435+
REQUIRE(!machine2.is_forked());
436+
437+
// Run for at most 4 seconds before giving up
438+
machine2.run(4.0f);
439+
REQUIRE(machine2.return_value() == 666); // Main() return value
440+
441+
machine2.prepare_copy_on_write(0);
442+
417443
// Create fork
418-
auto fork1 = tinykvm::Machine { machine, {
444+
auto fork1 = tinykvm::Machine { machine2, {
419445
.max_mem = MAX_MEMORY, .max_cow_mem = MAX_COWMEM
420446
} };
421447
REQUIRE(fork1.return_value() == 666); // Main() return value
@@ -425,25 +451,17 @@ int func2() {
425451

426452
fork1.vmcall("func2");
427453
REQUIRE(fork1.return_value() == 54321);
428-
429-
// This is problematic, but we will try to fix this later
430-
// Forked VM is supposed to diverge from the main VM, regardless of mode
431-
machine.vmcall("set_value", 99999);
432-
fork1.vmcall("func1");
433-
REQUIRE(fork1.return_value() == 99999);
434-
435-
REQUIRE(machine.banked_memory_pages() == banked_pages_before);
436454
REQUIRE(fork1.banked_memory_pages() > 0);
437455

438456
for (int i = 0; i < 20; i++)
439457
{
440-
fork1.reset_to(machine, {
458+
fork1.reset_to(machine2, {
441459
.max_mem = MAX_MEMORY,
442460
.max_cow_mem = MAX_COWMEM,
443461
});
444462

445463
fork1.vmcall("func1");
446-
REQUIRE(fork1.return_value() == 99999);
464+
REQUIRE(fork1.return_value() == 12345);
447465

448466
fork1.vmcall("func2");
449467
REQUIRE(fork1.return_value() == 54321);

0 commit comments

Comments
 (0)