Skip to content

Commit 97538ee

Browse files
committed
block: advisory locks: use byte-range locks to match QEMU behavior
Switch from a whole-file lock (0, 0 /* EOF */) to a byte-range lock covering (0, disk.len()). This avoids issues in large cloud deployments where Linux and management software may translate OFD locks into mandatory locks on certain network filesystems. With this change, management software can safely snapshot disk images while a VM is running. Other Cloud Hypervisor instances will still be prevented from using those images concurrently, as CHV continues to enforce advisory lock checks. This also brings our behavior in line with QEMU. Whole-file locks remain a valid use case and could be supported later. This patch only provides the necessary groundwork; making it configurable is out of scope for now. Signed-off-by: Philipp Schuster <philipp.schuster@cyberus-technology.de> On-behalf-of: SAP philipp.schuster@sap.com
1 parent 1498fee commit 97538ee

File tree

2 files changed

+63
-13
lines changed

2 files changed

+63
-13
lines changed

block/src/fcntl.rs

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,42 @@ impl LockState {
101101
}
102102
}
103103

104+
/// The granularity of the advisory lock.
105+
///
106+
/// The granularity has significant implications in larger Cloud deployments,
107+
/// especially NFS. An advisory lock for the whole file will be translated by
108+
/// Linux into a mandatory lock in NFS volumes. This however is not desirable
109+
/// in some cases, e.g., to snapshot disk images in the management layer.
110+
/// In these cases, users may prefer a `(0, disk.len())` byte range lock.
111+
#[derive(Clone, Copy, Debug)]
112+
pub enum LockGranularity {
113+
WholeFile,
114+
ByteRange(u64 /* from, inclusive */, u64 /* len */),
115+
}
116+
117+
impl LockGranularity {
118+
const fn l_start(self) -> u64 {
119+
match self {
120+
LockGranularity::WholeFile => 0,
121+
LockGranularity::ByteRange(start, _) => start,
122+
}
123+
}
124+
125+
const fn l_len(self) -> u64 {
126+
match self {
127+
LockGranularity::WholeFile => 0,
128+
LockGranularity::ByteRange(_, len) => len,
129+
}
130+
}
131+
}
132+
104133
/// Returns a [`struct@libc::flock`] structure for the whole file.
105-
const fn get_flock(lock_type: LockType) -> libc::flock {
134+
const fn get_flock(lock_type: LockType, granularity: LockGranularity) -> libc::flock {
106135
libc::flock {
107136
l_type: lock_type.to_libc_val() as libc::c_short,
108137
l_whence: libc::SEEK_SET as libc::c_short,
109-
l_start: 0,
110-
l_len: 0, /* EOF */
138+
l_start: granularity.l_start() as libc::c_long,
139+
l_len: granularity.l_len() as libc::c_long,
111140
l_pid: 0, /* filled by callee */
112141
}
113142
}
@@ -122,8 +151,13 @@ const fn get_flock(lock_type: LockType) -> libc::flock {
122151
/// - `file`: The file to acquire a lock for [`LockType`]. The file's state will
123152
/// be logically mutated, but not technically.
124153
/// - `lock_type`: The [`LockType`]
125-
pub fn try_acquire_lock<Fd: AsRawFd>(file: Fd, lock_type: LockType) -> Result<(), LockError> {
126-
let flock = get_flock(lock_type);
154+
/// - `granularity`: The [`LockGranularity`].
155+
pub fn try_acquire_lock<Fd: AsRawFd>(
156+
file: Fd,
157+
lock_type: LockType,
158+
granularity: LockGranularity,
159+
) -> Result<(), LockError> {
160+
let flock = get_flock(lock_type, granularity);
127161

128162
let res = fcntl(file.as_raw_fd(), FcntlArg::F_OFD_SETLK(&flock));
129163
match res {
@@ -146,17 +180,22 @@ pub fn try_acquire_lock<Fd: AsRawFd>(file: Fd, lock_type: LockType) -> Result<()
146180
///
147181
/// # Parameters
148182
/// - `file`: The file to clear all locks for [`LockType`].
149-
pub fn clear_lock<Fd: AsRawFd>(file: Fd) -> Result<(), LockError> {
150-
try_acquire_lock(file, LockType::Unlock)
183+
/// - `granularity`: The [`LockGranularity`].
184+
pub fn clear_lock<Fd: AsRawFd>(file: Fd, granularity: LockGranularity) -> Result<(), LockError> {
185+
try_acquire_lock(file, LockType::Unlock, granularity)
151186
}
152187

153188
/// Returns the current lock state using [`fcntl`] with respect to the given
154189
/// parameters.
155190
///
156191
/// # Parameters
157192
/// - `file`: The file for which to get the lock state.
158-
pub fn get_lock_state<Fd: AsRawFd>(file: Fd) -> Result<LockState, LockError> {
159-
let mut flock = get_flock(LockType::Write);
193+
/// - `granularity`: The [`LockGranularity`].
194+
pub fn get_lock_state<Fd: AsRawFd>(
195+
file: Fd,
196+
granularity: LockGranularity,
197+
) -> Result<LockState, LockError> {
198+
let mut flock = get_flock(LockType::Write, granularity);
160199
let res = fcntl(file.as_raw_fd(), FcntlArg::F_OFD_GETLK(&mut flock));
161200
match res {
162201
0 => {

virtio-devices/src/block.rs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use std::{io, result};
1919

2020
use anyhow::anyhow;
2121
use block::async_io::{AsyncIo, AsyncIoError, DiskFile};
22-
use block::fcntl::{LockError, LockType, get_lock_state};
22+
use block::fcntl::{LockError, LockGranularity, LockType, get_lock_state};
2323
use block::{
2424
ExecuteAsync, ExecuteError, Request, RequestType, VirtioBlockConfig, build_serial, fcntl,
2525
};
@@ -789,9 +789,14 @@ impl Block {
789789
self.id,
790790
self.disk_path.display()
791791
);
792+
let granularity = self
793+
.disk_image
794+
.size()
795+
.map(|size| LockGranularity::ByteRange(0, size))
796+
.unwrap_or(LockGranularity::WholeFile);
792797
let fd = self.disk_image.fd();
793-
fcntl::try_acquire_lock(fd, lock_type).map_err(|error| {
794-
let current_lock = get_lock_state(fd);
798+
fcntl::try_acquire_lock(fd, lock_type, granularity).map_err(|error| {
799+
let current_lock = get_lock_state(fd, granularity);
795800
// Don't propagate the error to the outside, as it is not useful at all. Instead,
796801
// we try to log additional help to the user.
797802
if let Ok(current_lock) = current_lock {
@@ -815,10 +820,16 @@ impl Block {
815820

816821
/// Releases the advisory lock held for the corresponding disk image.
817822
pub fn unlock_image(&mut self) -> Result<()> {
823+
let granularity = self
824+
.disk_image
825+
.size()
826+
.map(|size| LockGranularity::ByteRange(0, size))
827+
.unwrap_or(LockGranularity::WholeFile);
828+
818829
// It is very unlikely that this fails;
819830
// Should we remove the Result to simplify the error propagation on
820831
// higher levels?
821-
fcntl::clear_lock(self.disk_image.fd()).map_err(|error| Error::LockDiskImage {
832+
fcntl::clear_lock(self.disk_image.fd(), granularity).map_err(|error| Error::LockDiskImage {
822833
path: self.disk_path.clone(),
823834
error,
824835
lock_type: LockType::Unlock,

0 commit comments

Comments
 (0)