Skip to content

Commit 71a36e1

Browse files
committed
refactor(balloon): do not madvise(MADV_DONTNEED) when resuming from file
This operation is completely useless after we mmap-ed anonymous private zero pages on top of the file mapping. Also, add a TODO for memfd-backed memory, as it's currently not supported using madvise. Signed-off-by: Riccardo Mancini <[email protected]>
1 parent 438370b commit 71a36e1

File tree

1 file changed

+45
-36
lines changed

1 file changed

+45
-36
lines changed

src/vmm/src/vstate/memory.rs

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -108,43 +108,52 @@ impl GuestMemoryRegionExt {
108108
) -> Result<usize, GuestMemoryError> {
109109
let phys_address = self.get_host_address(caddr)?;
110110

111-
// If and only if we are resuming from a snapshot file, we have a file and it's mapped
112-
// private
113-
if self.inner().file_offset().is_some() && self.inner().flags() & libc::MAP_PRIVATE != 0 {
114-
// Mmap a new anonymous region over the present one in order to create a hole
115-
// with zero pages.
116-
// This workaround is (only) needed after resuming from a snapshot file because the
117-
// guest memory is mmaped from file as private. In this case, MADV_DONTNEED on the
118-
// file only drops any anonymous pages in range, but subsequent accesses would read
119-
// whatever page is stored on the backing file. Mmapping anonymous pages ensures it's
120-
// zeroed.
121-
// SAFETY: The address and length are known to be valid.
122-
let ret = unsafe {
123-
libc::mmap(
124-
phys_address.cast(),
125-
len,
126-
libc::PROT_READ | libc::PROT_WRITE,
127-
libc::MAP_FIXED | libc::MAP_ANONYMOUS | libc::MAP_PRIVATE,
128-
-1,
129-
0,
130-
)
131-
};
132-
if ret == libc::MAP_FAILED {
133-
let os_error = std::io::Error::last_os_error();
134-
error!("discard_range: mmap failed: {:?}", os_error);
135-
return Err(GuestMemoryError::IOError(os_error));
111+
match (self.inner().file_offset(), self.inner().flags()) {
112+
// If and only if we are resuming from a snapshot file, we have a file and it's mapped
113+
// private
114+
(Some(_), flags) if flags & libc::MAP_PRIVATE != 0 => {
115+
// Mmap a new anonymous region over the present one in order to create a hole
116+
// with zero pages.
117+
// This workaround is (only) needed after resuming from a snapshot file because the
118+
// guest memory is mmaped from file as private. In this case, MADV_DONTNEED on the
119+
// file only drops any anonymous pages in range, but subsequent accesses would read
120+
// whatever page is stored on the backing file. Mmapping anonymous pages ensures
121+
// it's zeroed.
122+
// SAFETY: The address and length are known to be valid.
123+
let ret = unsafe {
124+
libc::mmap(
125+
phys_address.cast(),
126+
len,
127+
libc::PROT_READ | libc::PROT_WRITE,
128+
libc::MAP_FIXED | libc::MAP_ANONYMOUS | libc::MAP_PRIVATE,
129+
-1,
130+
0,
131+
)
132+
};
133+
if ret == libc::MAP_FAILED {
134+
let os_error = std::io::Error::last_os_error();
135+
error!("discard_range: mmap failed: {:?}", os_error);
136+
Err(GuestMemoryError::IOError(os_error))
137+
} else {
138+
Ok(len)
139+
}
140+
}
141+
// TODO: madvise(MADV_DONTNEED) doesn't actually work with memfd
142+
// (or in general MAP_SHARED of a fd). In those cases we should use
143+
// fallocate64(FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE).
144+
// We keep falling to the madvise branch to keep the previous behaviour.
145+
(None, _) | (Some(_), _) => {
146+
// Madvise the region in order to mark it as not used.
147+
// SAFETY: The address and length are known to be valid.
148+
let ret = unsafe { libc::madvise(phys_address.cast(), len, libc::MADV_DONTNEED) };
149+
if ret < 0 {
150+
let os_error = std::io::Error::last_os_error();
151+
error!("discard_range: madvise failed: {:?}", os_error);
152+
Err(GuestMemoryError::IOError(os_error))
153+
} else {
154+
Ok(len)
155+
}
136156
}
137-
}
138-
139-
// Madvise the region in order to mark it as not used.
140-
// SAFETY: The address and length are known to be valid.
141-
let ret = unsafe { libc::madvise(phys_address.cast(), len, libc::MADV_DONTNEED) };
142-
if ret < 0 {
143-
let os_error = std::io::Error::last_os_error();
144-
error!("discard_range: madvise failed: {:?}", os_error);
145-
Err(GuestMemoryError::IOError(os_error))
146-
} else {
147-
Ok(len)
148157
}
149158
}
150159

0 commit comments

Comments
 (0)