Skip to content

Commit e5420d2

Browse files
committed
block: advisory locks: use byte-range locks to match QEMU behavior
Switch from a whole-file lock (0, 0 /* EOF */) to a byte-range lock covering (0, disk.len()), i.e., enabling control over the lock's granularity. The granularity has significant implications in typical Cloud deployments where disk image management software (e.g., Cinder as part of OpenStack) may treat advisory locks for the whole file as mandatory locks. This however prevents these software components from snapshotting disk images while VMs are running. Therefore, it is a valid use case to lock the whole byte range of a disk image without technically locking the whole file - to get the best of both worlds. This also brings CHVs behavior in line with QEMU. Whole-file locks remain a valid use case and could be supported later. This patch only provides the necessary groundwork; making it configurable is out of scope for now. Signed-off-by: Philipp Schuster <philipp.schuster@cyberus-technology.de> On-behalf-of: SAP philipp.schuster@sap.com
1 parent 1498fee commit e5420d2

File tree

2 files changed

+65
-13
lines changed

2 files changed

+65
-13
lines changed

block/src/fcntl.rs

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,44 @@ impl LockState {
101101
}
102102
}
103103

104+
/// The granularity of the advisory lock.
105+
///
106+
/// The granularity has significant implications in typical Cloud deployments
107+
/// where disk image management software (e.g., Cinder as part of OpenStack)
108+
/// may treat advisory locks for the whole file as mandatory locks. This
109+
/// however prevents these software components from snapshotting disk images
110+
/// while VMs are running. Therefore, it is a valid use case to lock the whole
111+
/// byte range of a disk image without technically locking the whole file -
112+
/// to get the best of both worlds.
113+
#[derive(Clone, Copy, Debug)]
114+
pub enum LockGranularity {
115+
WholeFile,
116+
ByteRange(u64 /* from, inclusive */, u64 /* len */),
117+
}
118+
119+
impl LockGranularity {
120+
const fn l_start(self) -> u64 {
121+
match self {
122+
LockGranularity::WholeFile => 0,
123+
LockGranularity::ByteRange(start, _) => start,
124+
}
125+
}
126+
127+
const fn l_len(self) -> u64 {
128+
match self {
129+
LockGranularity::WholeFile => 0, /* EOF */
130+
LockGranularity::ByteRange(_, len) => len,
131+
}
132+
}
133+
}
134+
104135
/// Returns a [`struct@libc::flock`] structure for the whole file.
105-
const fn get_flock(lock_type: LockType) -> libc::flock {
136+
const fn get_flock(lock_type: LockType, granularity: LockGranularity) -> libc::flock {
106137
libc::flock {
107138
l_type: lock_type.to_libc_val() as libc::c_short,
108139
l_whence: libc::SEEK_SET as libc::c_short,
109-
l_start: 0,
110-
l_len: 0, /* EOF */
140+
l_start: granularity.l_start() as libc::c_long,
141+
l_len: granularity.l_len() as libc::c_long,
111142
l_pid: 0, /* filled by callee */
112143
}
113144
}
@@ -122,8 +153,13 @@ const fn get_flock(lock_type: LockType) -> libc::flock {
122153
/// - `file`: The file to acquire a lock for [`LockType`]. The file's state will
123154
/// be logically mutated, but not technically.
124155
/// - `lock_type`: The [`LockType`]
125-
pub fn try_acquire_lock<Fd: AsRawFd>(file: Fd, lock_type: LockType) -> Result<(), LockError> {
126-
let flock = get_flock(lock_type);
156+
/// - `granularity`: The [`LockGranularity`].
157+
pub fn try_acquire_lock<Fd: AsRawFd>(
158+
file: Fd,
159+
lock_type: LockType,
160+
granularity: LockGranularity,
161+
) -> Result<(), LockError> {
162+
let flock = get_flock(lock_type, granularity);
127163

128164
let res = fcntl(file.as_raw_fd(), FcntlArg::F_OFD_SETLK(&flock));
129165
match res {
@@ -146,17 +182,22 @@ pub fn try_acquire_lock<Fd: AsRawFd>(file: Fd, lock_type: LockType) -> Result<()
146182
///
147183
/// # Parameters
148184
/// - `file`: The file to clear all locks for [`LockType`].
149-
pub fn clear_lock<Fd: AsRawFd>(file: Fd) -> Result<(), LockError> {
150-
try_acquire_lock(file, LockType::Unlock)
185+
/// - `granularity`: The [`LockGranularity`].
186+
pub fn clear_lock<Fd: AsRawFd>(file: Fd, granularity: LockGranularity) -> Result<(), LockError> {
187+
try_acquire_lock(file, LockType::Unlock, granularity)
151188
}
152189

153190
/// Returns the current lock state using [`fcntl`] with respect to the given
154191
/// parameters.
155192
///
156193
/// # Parameters
157194
/// - `file`: The file for which to get the lock state.
158-
pub fn get_lock_state<Fd: AsRawFd>(file: Fd) -> Result<LockState, LockError> {
159-
let mut flock = get_flock(LockType::Write);
195+
/// - `granularity`: The [`LockGranularity`].
196+
pub fn get_lock_state<Fd: AsRawFd>(
197+
file: Fd,
198+
granularity: LockGranularity,
199+
) -> Result<LockState, LockError> {
200+
let mut flock = get_flock(LockType::Write, granularity);
160201
let res = fcntl(file.as_raw_fd(), FcntlArg::F_OFD_GETLK(&mut flock));
161202
match res {
162203
0 => {

virtio-devices/src/block.rs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use std::{io, result};
1919

2020
use anyhow::anyhow;
2121
use block::async_io::{AsyncIo, AsyncIoError, DiskFile};
22-
use block::fcntl::{LockError, LockType, get_lock_state};
22+
use block::fcntl::{LockError, LockGranularity, LockType, get_lock_state};
2323
use block::{
2424
ExecuteAsync, ExecuteError, Request, RequestType, VirtioBlockConfig, build_serial, fcntl,
2525
};
@@ -789,9 +789,14 @@ impl Block {
789789
self.id,
790790
self.disk_path.display()
791791
);
792+
let granularity = self
793+
.disk_image
794+
.size()
795+
.map(|size| LockGranularity::ByteRange(0, size))
796+
.unwrap_or(LockGranularity::WholeFile);
792797
let fd = self.disk_image.fd();
793-
fcntl::try_acquire_lock(fd, lock_type).map_err(|error| {
794-
let current_lock = get_lock_state(fd);
798+
fcntl::try_acquire_lock(fd, lock_type, granularity).map_err(|error| {
799+
let current_lock = get_lock_state(fd, granularity);
795800
// Don't propagate the error to the outside, as it is not useful at all. Instead,
796801
// we try to log additional help to the user.
797802
if let Ok(current_lock) = current_lock {
@@ -815,10 +820,16 @@ impl Block {
815820

816821
/// Releases the advisory lock held for the corresponding disk image.
817822
pub fn unlock_image(&mut self) -> Result<()> {
823+
let granularity = self
824+
.disk_image
825+
.size()
826+
.map(|size| LockGranularity::ByteRange(0, size))
827+
.unwrap_or(LockGranularity::WholeFile);
828+
818829
// It is very unlikely that this fails;
819830
// Should we remove the Result to simplify the error propagation on
820831
// higher levels?
821-
fcntl::clear_lock(self.disk_image.fd()).map_err(|error| Error::LockDiskImage {
832+
fcntl::clear_lock(self.disk_image.fd(), granularity).map_err(|error| Error::LockDiskImage {
822833
path: self.disk_path.clone(),
823834
error,
824835
lock_type: LockType::Unlock,

0 commit comments

Comments
 (0)