Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/vmm/src/devices/virtio/block/virtio/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::devices::virtio::block::CacheType;
use crate::devices::virtio::block::virtio::metrics::{BlockDeviceMetrics, BlockMetricsPerDevice};
use crate::devices::virtio::device::{DeviceState, IrqTrigger, IrqType, VirtioDevice};
use crate::devices::virtio::generated::virtio_blk::{
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_RO, VIRTIO_BLK_ID_BYTES,
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_RO, VIRTIO_BLK_ID_BYTES, VIRTIO_BLK_F_DISCARD,
};
use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1;
use crate::devices::virtio::generated::virtio_ring::VIRTIO_RING_F_EVENT_IDX;
Expand Down Expand Up @@ -295,7 +295,8 @@ impl VirtioBlock {
.map_err(VirtioBlockError::RateLimiter)?
.unwrap_or_default();

let mut avail_features = (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_RING_F_EVENT_IDX);
let mut avail_features = (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_RING_F_EVENT_IDX)
| (1u64 << VIRTIO_BLK_F_DISCARD);

if config.cache_type == CacheType::Writeback {
avail_features |= 1u64 << VIRTIO_BLK_F_FLUSH;
Expand Down
2 changes: 1 addition & 1 deletion src/vmm/src/devices/virtio/block/virtio/io/async_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ impl AsyncFileEngine {
Ok(())
}

#[cfg(test)]

pub fn file(&self) -> &File {
&self.file
}
Expand Down
39 changes: 38 additions & 1 deletion src/vmm/src/devices/virtio/block/virtio/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ pub mod sync_io;

use std::fmt::Debug;
use std::fs::File;
use libc::{c_int, off64_t};
use std::os::unix::io::AsRawFd;

pub use self::async_io::{AsyncFileEngine, AsyncIoError};
pub use self::sync_io::{SyncFileEngine, SyncIoError};
Expand Down Expand Up @@ -33,6 +35,13 @@ pub enum BlockIoError {
Async(AsyncIoError),
}

bitflags::bitflags! {
pub struct FallocateFlags: c_int {
const FALLOC_FL_KEEP_SIZE = 0x01;
const FALLOC_FL_PUNCH_HOLE = 0x02;
}
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

libc crate already has these constants

impl BlockIoError {
pub fn is_throttling_err(&self) -> bool {
match self {
Expand Down Expand Up @@ -75,7 +84,7 @@ impl FileEngine {
Ok(())
}

#[cfg(test)]

pub fn file(&self) -> &File {
match self {
FileEngine::Async(engine) => engine.file(),
Expand Down Expand Up @@ -172,6 +181,34 @@ impl FileEngine {
FileEngine::Sync(engine) => engine.flush().map_err(BlockIoError::Sync),
}
}

pub fn handle_discard(&self, offset: u64, len: u32) -> Result<(), std::io::Error> {
let fd = self.file().as_raw_fd();
let result = Self::fallocate(
fd,
FallocateFlags::FALLOC_FL_PUNCH_HOLE | FallocateFlags::FALLOC_FL_KEEP_SIZE,
offset as i64,
len as i64,
);
if let Err(e) = result {
eprintln!("Discard failed: {}", e);
return Err(std::io::Error::last_os_error());
}
Ok(())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we have 2 backends for block device, each need to implement it's own version of discard. For sync version this impl is basically what it needs to be, for io_uring backend there is IORING_OP_FALLOCATE op.

}

pub fn fallocate(fd: c_int, mode: FallocateFlags, offset: off64_t, len: off64_t) -> Result<(), std::io::Error> {
// need to refer to libc library.
let ret: i32 = unsafe { libc::fallocate64(fd, mode.bits(), offset, len) };
if ret == 0 {
Ok(())
} else {
Err(std::io::Error::last_os_error())
}
}



}

#[cfg(test)]
Expand Down
4 changes: 3 additions & 1 deletion src/vmm/src/devices/virtio/block/virtio/io/sync_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ pub enum SyncIoError {
SyncAll(std::io::Error),
/// Transfer: {0}
Transfer(GuestMemoryError),
/// Discard: {0}
Discard(std::io::Error)
}

#[derive(Debug)]
Expand All @@ -33,7 +35,7 @@ impl SyncFileEngine {
SyncFileEngine { file }
}

#[cfg(test)]

pub fn file(&self) -> &File {
&self.file
}
Expand Down
3 changes: 3 additions & 0 deletions src/vmm/src/devices/virtio/block/virtio/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ pub struct BlockDeviceMetrics {
pub invalid_reqs_count: SharedIncMetric,
/// Number of flushes operation triggered on this block device.
pub flush_count: SharedIncMetric,
/// Number of discard operation triggered on this block device.
pub discard_count: SharedIncMetric,
/// Number of events triggered on the queue of this block device.
pub queue_event_count: SharedIncMetric,
/// Number of events ratelimiter-related.
Expand Down Expand Up @@ -210,6 +212,7 @@ impl BlockDeviceMetrics {
self.invalid_reqs_count
.add(other.invalid_reqs_count.fetch_diff());
self.flush_count.add(other.flush_count.fetch_diff());
self.discard_count.add(other.discard_count.fetch_diff());
self.queue_event_count
.add(other.queue_event_count.fetch_diff());
self.rate_limiter_event_count
Expand Down
8 changes: 8 additions & 0 deletions src/vmm/src/devices/virtio/block/virtio/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,12 @@ pub enum VirtioBlockError {
RateLimiter(std::io::Error),
/// Persistence error: {0}
Persist(crate::devices::virtio::persist::PersistError),
/// Sector overflow in discard segment
SectorOverflow,
/// Discard segment exceeds disk size
BeyondDiskSize,
/// Invalid flags in discard segment
InvalidDiscardFlags,
/// Invalid discard request (e.g., empty segments)
InvalidDiscardRequest,
}
90 changes: 88 additions & 2 deletions src/vmm/src/devices/virtio/block/virtio/request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,19 @@ use std::convert::From;

use vm_memory::GuestMemoryError;

use super::io::{BlockIoError, SyncIoError};
use super::{SECTOR_SHIFT, SECTOR_SIZE, VirtioBlockError, io as block_io};
use crate::devices::virtio::block::virtio::device::DiskProperties;
use crate::devices::virtio::block::virtio::metrics::BlockDeviceMetrics;
pub use crate::devices::virtio::generated::virtio_blk::{
VIRTIO_BLK_ID_BYTES, VIRTIO_BLK_S_IOERR, VIRTIO_BLK_S_OK, VIRTIO_BLK_S_UNSUPP,
VIRTIO_BLK_T_FLUSH, VIRTIO_BLK_T_GET_ID, VIRTIO_BLK_T_IN, VIRTIO_BLK_T_OUT,
VIRTIO_BLK_T_DISCARD
};
use crate::devices::virtio::queue::DescriptorChain;
use crate::logger::{IncMetric, error};
use crate::rate_limiter::{RateLimiter, TokenType};
use crate::vstate::memory::{ByteValued, Bytes, GuestAddress, GuestMemoryMmap};
use crate::vstate::memory::{ByteValued, Bytes, GuestAddress, GuestMemoryMmap, Address};

#[derive(Debug, derive_more::From)]
pub enum IoErr {
Expand All @@ -34,6 +36,7 @@ pub enum RequestType {
Out,
Flush,
GetDeviceID,
Discard,
Unsupported(u32),
}

Expand All @@ -44,6 +47,7 @@ impl From<u32> for RequestType {
VIRTIO_BLK_T_OUT => RequestType::Out,
VIRTIO_BLK_T_FLUSH => RequestType::Flush,
VIRTIO_BLK_T_GET_ID => RequestType::GetDeviceID,
VIRTIO_BLK_T_DISCARD => RequestType::Discard,
t => RequestType::Unsupported(t),
}
}
Expand Down Expand Up @@ -176,6 +180,12 @@ impl PendingRequest {
(Ok(transferred_data_len), RequestType::GetDeviceID) => {
Status::from_data(self.data_len, transferred_data_len, true)
}
(Ok(_), RequestType::Discard) => {
block_metrics.discard_count.inc();
Status::Ok {
num_bytes_to_mem: 0,
}
}
(_, RequestType::Unsupported(op)) => Status::Unsupported { op },
(Err(err), _) => Status::IoErr {
num_bytes_to_mem: 0,
Expand Down Expand Up @@ -231,13 +241,23 @@ impl RequestHeader {
}
}

// For this, please see VirtIO v1.2. This struct mimics that of the implementation in Virtio.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct DiscardSegment {
sector: u64,
num_sectors: u32,
flags: u32,
}
Comment on lines +247 to +251
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

flags is not just a 32 bit integer, it is a struct with 2 u32 inside. Also it needs to have a C struct layout attribute.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for delay, was busy with other things but- I thought that flags was a single 32-bit integer, where 1 bit was for unmap, and the other 31 bits are reserved for struct alignment I'm assuming.

For reference, I'm using VirtI/O 1.2 Documentation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, you are right, I totally missed the bitfield syntax in C definition.
Still you need to add the [repr(C)] to the struct definition.

unsafe impl ByteValued for DiscardSegment {}

#[derive(Debug, PartialEq, Eq)]
pub struct Request {
pub r#type: RequestType,
pub data_len: u32,
pub status_addr: GuestAddress,
sector: u64,
data_addr: GuestAddress,
discard_segments: Option<Vec<DiscardSegment>>, // New field, holds ranges
}

impl Request {
Expand All @@ -258,10 +278,11 @@ impl Request {
data_addr: GuestAddress(0),
data_len: 0,
status_addr: GuestAddress(0),
discard_segments: None
};

let data_desc;
let status_desc;
let mut status_desc;
let desc = avail_desc
.next_descriptor()
.ok_or(VirtioBlockError::DescriptorChainTooShort)?;
Expand Down Expand Up @@ -313,6 +334,52 @@ impl Request {
return Err(VirtioBlockError::InvalidDataLength);
}
}
RequestType::Discard => {
// Get data descriptor
let data_desc = avail_desc.next_descriptor()
.ok_or(VirtioBlockError::DescriptorChainTooShort)?;
if data_desc.is_write_only() {
return Err(VirtioBlockError::UnexpectedWriteOnlyDescriptor);
}

// Validate data length
let segment_size = std::mem::size_of::<DiscardSegment>() as u32; // 16 bytes
if data_desc.len % segment_size != 0 {
return Err(VirtioBlockError::InvalidDataLength);
}

// Calculate number of segments
let num_segments = data_desc.len / segment_size;
if num_segments == 0 {
return Err(VirtioBlockError::InvalidDiscardRequest);
}
let mut segments = Vec::with_capacity(num_segments as usize);

// Populate DiscardSegments vector
for i in 0..num_segments {
let offset = i * segment_size;
let segment: DiscardSegment = mem.read_obj(data_desc.addr.unchecked_add(offset as u64))
.map_err(VirtioBlockError::GuestMemory)?;
if segment.flags & !0x1 != 0 {
return Err(VirtioBlockError::InvalidDiscardFlags);
}
let end_sector = segment.sector.checked_add(segment.num_sectors as u64)
.ok_or(VirtioBlockError::SectorOverflow)?;
if end_sector > num_disk_sectors {
return Err(VirtioBlockError::BeyondDiskSize);
}
segments.push(segment);
}

// Assign to req.discard_segments
req.discard_segments = Some(segments);
req.data_len = data_desc.len;
status_desc = data_desc.next_descriptor()
.ok_or(VirtioBlockError::DescriptorChainTooShort)?;
if !status_desc.is_write_only() {
return Err(VirtioBlockError::UnexpectedReadOnlyDescriptor);
}
}
_ => {}
}

Expand Down Expand Up @@ -390,6 +457,22 @@ impl Request {
.map_err(IoErr::GetId);
return ProcessingResult::Executed(pending.finish(mem, res, block_metrics));
}
RequestType::Discard => {
let res = disk
.file_engine
.handle_discard(self.offset(), self.data_len);

match res {
Ok(()) => Ok(block_io::FileEngineOk::Executed(block_io::RequestOk {
req: pending,
count: 0,
})),
Err(e) => Err(block_io::RequestError {
req: pending,
error: BlockIoError::Sync(SyncIoError::Discard(e)),
}),
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice hallucinations. The file_engine.handle_discard should return same result type as file_engine.read/file_engine.write and so on. And then it will be handled by the code bellow.

Copy link
Contributor Author

@LDagnachew LDagnachew Apr 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha, that checks out, I'll make that change.

}
RequestType::Unsupported(_) => {
return ProcessingResult::Executed(pending.finish(mem, Ok(0), block_metrics));
}
Expand Down Expand Up @@ -730,6 +813,7 @@ mod tests {
RequestType::Out => VIRTIO_BLK_T_OUT,
RequestType::Flush => VIRTIO_BLK_T_FLUSH,
RequestType::GetDeviceID => VIRTIO_BLK_T_GET_ID,
RequestType::Discard => VIRTIO_BLK_T_DISCARD,
RequestType::Unsupported(id) => id,
}
}
Expand All @@ -742,6 +826,7 @@ mod tests {
RequestType::Out => VIRTQ_DESC_F_NEXT,
RequestType::Flush => VIRTQ_DESC_F_NEXT,
RequestType::GetDeviceID => VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE,
RequestType::Discard => VIRTQ_DESC_F_NEXT,
RequestType::Unsupported(_) => VIRTQ_DESC_F_NEXT,
}
}
Expand Down Expand Up @@ -833,6 +918,7 @@ mod tests {
status_addr,
sector: sector & (NUM_DISK_SECTORS - sectors_len),
data_addr,
discard_segments: None
};
let mut request_header = RequestHeader::new(virtio_request_id, request.sector);

Expand Down