Skip to content

Commit d0bd72c

Browse files
committed
KVM: selftests: Collect *all* dirty entries in each dirty_log_test iteration
Collect all dirty entries during each iteration of dirty_log_test by doing a final collection after the vCPU has been stopped. To deal with KVM's destructive approach to getting the dirty bitmaps, use a second bitmap for the post-stop collection. Collecting all entries that were dirtied during an iteration simplifies the verification logic *and* improves test coverage. - If a page is written during iteration X, but not seen as dirty until X+1, the test can get a false pass if the page is also written during X+1. - If a dirty page used a stale value from a previous iteration, the test would grant a false pass. - If a missed dirty log occurs in the last iteration, the test would fail to detect the issue. E.g. modifying mark_page_dirty_in_slot() to dirty an unwritten gfn: if (memslot && kvm_slot_dirty_track_enabled(memslot)) { unsigned long rel_gfn = gfn - memslot->base_gfn; u32 slot = (memslot->as_id << 16) | memslot->id; if (!vcpu->extra_dirty && gfn_to_memslot(kvm, gfn + 1) == memslot) { vcpu->extra_dirty = true; mark_page_dirty_in_slot(kvm, memslot, gfn + 1); } if (kvm->dirty_ring_size && vcpu) kvm_dirty_ring_push(vcpu, slot, rel_gfn); else if (memslot->dirty_bitmap) set_bit_le(rel_gfn, memslot->dirty_bitmap); } isn't detected with the current approach, even with an interval of 1ms (when running nested in a VM; bare metal would be even *less* likely to detect the bug due to the vCPU being able to dirty more memory). Whereas collecting all dirty entries consistently detects failures with an interval of 700ms or more (the longer interval means a higher probability of an actual write to the prematurely-dirtied page). Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Sean Christopherson <[email protected]>
1 parent 24b9a2a commit d0bd72c

File tree

1 file changed

+45
-104
lines changed

1 file changed

+45
-104
lines changed

tools/testing/selftests/kvm/dirty_log_test.c

Lines changed: 45 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ static uint64_t host_num_pages;
134134
/* For statistics only */
135135
static uint64_t host_dirty_count;
136136
static uint64_t host_clear_count;
137-
static uint64_t host_track_next_count;
138137

139138
/* Whether dirty ring reset is requested, or finished */
140139
static sem_t sem_vcpu_stop;
@@ -422,15 +421,6 @@ struct log_mode {
422421
},
423422
};
424423

425-
/*
426-
* We use this bitmap to track some pages that should have its dirty
427-
* bit set in the _next_ iteration. For example, if we detected the
428-
* page value changed to current iteration but at the same time the
429-
* page bit is cleared in the latest bitmap, then the system must
430-
* report that write in the next get dirty log call.
431-
*/
432-
static unsigned long *host_bmap_track;
433-
434424
static void log_modes_dump(void)
435425
{
436426
int i;
@@ -491,79 +481,52 @@ static void *vcpu_worker(void *data)
491481
return NULL;
492482
}
493483

494-
static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
484+
static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap)
495485
{
496486
uint64_t page, nr_dirty_pages = 0, nr_clean_pages = 0;
497487
uint64_t step = vm_num_host_pages(mode, 1);
498-
uint64_t min_iter = 0;
499488

500489
for (page = 0; page < host_num_pages; page += step) {
501490
uint64_t val = *(uint64_t *)(host_test_mem + page * host_page_size);
491+
bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]);
502492

503-
/* If this is a special page that we were tracking... */
504-
if (__test_and_clear_bit_le(page, host_bmap_track)) {
505-
host_track_next_count++;
506-
TEST_ASSERT(test_bit_le(page, bmap),
507-
"Page %"PRIu64" should have its dirty bit "
508-
"set in this iteration but it is missing",
509-
page);
510-
}
511-
512-
if (__test_and_clear_bit_le(page, bmap)) {
493+
/*
494+
* Ensure both bitmaps are cleared, as a page can be written
495+
* multiple times per iteration, i.e. can show up in both
496+
* bitmaps, and the dirty ring is additive, i.e. doesn't purge
497+
* bitmap entries from previous collections.
498+
*/
499+
if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) {
513500
nr_dirty_pages++;
514501

515502
/*
516-
* If the bit is set, the value written onto
517-
* the corresponding page should be either the
518-
* previous iteration number or the current one.
503+
* If the page is dirty, the value written to memory
504+
* should be the current iteration number.
519505
*/
520-
if (val == iteration || val == iteration - 1)
506+
if (val == iteration)
521507
continue;
522508

523509
if (host_log_mode == LOG_MODE_DIRTY_RING) {
524-
if (val == iteration - 2 && min_iter <= iteration - 2) {
525-
/*
526-
* Short answer: this case is special
527-
* only for dirty ring test where the
528-
* page is the last page before a kvm
529-
* dirty ring full in iteration N-2.
530-
*
531-
* Long answer: Assuming ring size R,
532-
* one possible condition is:
533-
*
534-
* main thr vcpu thr
535-
* -------- --------
536-
* iter=1
537-
* write 1 to page 0~(R-1)
538-
* full, vmexit
539-
* collect 0~(R-1)
540-
* kick vcpu
541-
* write 1 to (R-1)~(2R-2)
542-
* full, vmexit
543-
* iter=2
544-
* collect (R-1)~(2R-2)
545-
* kick vcpu
546-
* write 1 to (2R-2)
547-
* (NOTE!!! "1" cached in cpu reg)
548-
* write 2 to (2R-1)~(3R-3)
549-
* full, vmexit
550-
* iter=3
551-
* collect (2R-2)~(3R-3)
552-
* (here if we read value on page
553-
* "2R-2" is 1, while iter=3!!!)
554-
*
555-
* This however can only happen once per iteration.
556-
*/
557-
min_iter = iteration - 1;
558-
continue;
559-
} else if (page == dirty_ring_last_page ||
560-
page == dirty_ring_prev_iteration_last_page) {
561-
/*
562-
* Please refer to comments in
563-
* dirty_ring_last_page.
564-
*/
510+
/*
511+
* The last page in the ring from this iteration
512+
* or the previous can be written with the value
513+
* from the previous iteration (relative to the
514+
* last page's iteration), as the value to be
515+
* written may be cached in a CPU register.
516+
*/
517+
if (page == dirty_ring_last_page ||
518+
page == dirty_ring_prev_iteration_last_page)
565519
continue;
566-
}
520+
} else if (!val && iteration == 1 && bmap0_dirty) {
521+
/*
522+
* When testing get+clear, the dirty bitmap
523+
* starts with all bits set, and so the first
524+
* iteration can observe a "dirty" page that
525+
* was never written, but only in the first
526+
* bitmap (collecting the bitmap also clears
527+
* all dirty pages).
528+
*/
529+
continue;
567530
}
568531

569532
TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) "
@@ -574,36 +537,13 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
574537
nr_clean_pages++;
575538
/*
576539
* If cleared, the value written can be any
577-
* value smaller or equals to the iteration
578-
* number. Note that the value can be exactly
579-
* (iteration-1) if that write can happen
580-
* like this:
581-
*
582-
* (1) increase loop count to "iteration-1"
583-
* (2) write to page P happens (with value
584-
* "iteration-1")
585-
* (3) get dirty log for "iteration-1"; we'll
586-
* see that page P bit is set (dirtied),
587-
* and not set the bit in host_bmap_track
588-
* (4) increase loop count to "iteration"
589-
* (which is current iteration)
590-
* (5) get dirty log for current iteration,
591-
* we'll see that page P is cleared, with
592-
* value "iteration-1".
540+
* value smaller than the iteration number.
593541
*/
594-
TEST_ASSERT(val <= iteration,
595-
"Clear page %lu value (%lu) > iteration (%lu) "
542+
TEST_ASSERT(val < iteration,
543+
"Clear page %lu value (%lu) >= iteration (%lu) "
596544
"(last = %lu, prev_last = %lu)",
597545
page, val, iteration, dirty_ring_last_page,
598546
dirty_ring_prev_iteration_last_page);
599-
if (val == iteration) {
600-
/*
601-
* This page is _just_ modified; it
602-
* should report its dirtyness in the
603-
* next run
604-
*/
605-
__set_bit_le(page, host_bmap_track);
606-
}
607547
}
608548
}
609549

@@ -639,7 +579,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
639579
struct test_params *p = arg;
640580
struct kvm_vcpu *vcpu;
641581
struct kvm_vm *vm;
642-
unsigned long *bmap;
582+
unsigned long *bmap[2];
643583
uint32_t ring_buf_idx = 0;
644584
int sem_val;
645585

@@ -695,8 +635,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
695635

696636
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
697637

698-
bmap = bitmap_zalloc(host_num_pages);
699-
host_bmap_track = bitmap_zalloc(host_num_pages);
638+
bmap[0] = bitmap_zalloc(host_num_pages);
639+
bmap[1] = bitmap_zalloc(host_num_pages);
700640

701641
/* Add an extra memory slot for testing dirty logging */
702642
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -723,7 +663,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
723663
WRITE_ONCE(host_quit, false);
724664
host_dirty_count = 0;
725665
host_clear_count = 0;
726-
host_track_next_count = 0;
727666
WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
728667

729668
/*
@@ -774,7 +713,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
774713
continue;
775714

776715
log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
777-
bmap, host_num_pages,
716+
bmap[0], host_num_pages,
778717
&ring_buf_idx);
779718
}
780719

@@ -804,6 +743,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
804743
* the flush of the last page, and since we handle the last
805744
* page specially verification will succeed anyway.
806745
*/
746+
log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
747+
bmap[1], host_num_pages,
748+
&ring_buf_idx);
807749
vm_dirty_log_verify(mode, bmap);
808750

809751
/*
@@ -824,12 +766,11 @@ static void run_test(enum vm_guest_mode mode, void *arg)
824766

825767
pthread_join(vcpu_thread, NULL);
826768

827-
pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
828-
"track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
829-
host_track_next_count);
769+
pr_info("Total bits checked: dirty (%lu), clear (%lu)\n",
770+
host_dirty_count, host_clear_count);
830771

831-
free(bmap);
832-
free(host_bmap_track);
772+
free(bmap[0]);
773+
free(bmap[1]);
833774
kvm_vm_free(vm);
834775
}
835776

0 commit comments

Comments
 (0)