|
| 1 | +// Copyright (C) 2025 Red Hat. All rights reserved. |
| 2 | +// |
| 3 | +// Use of this source code is governed by a BSD-style license that can be |
| 4 | +// found in the LICENSE-BSD-3-Clause file. |
| 5 | +// |
| 6 | +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause |
| 7 | + |
| 8 | +//! Provide an interface for IOMMUs enabling I/O virtual address (IOVA) translation. |
| 9 | +//! |
| 10 | +//! All IOMMUs consist of an IOTLB ([`Iotlb`]), which is backed by a data source that can deliver |
| 11 | +//! all mappings. For example, for vhost-user, that data source is the vhost-user front-end; i.e. |
| 12 | +//! IOTLB misses require sending a notification to the front-end and awaiting a reply that supplies |
| 13 | +//! the desired mapping. |
| 14 | +
|
| 15 | +use crate::{GuestAddress, Permissions}; |
| 16 | +use rangemap::RangeMap; |
| 17 | +use std::cmp; |
| 18 | +use std::fmt::Debug; |
| 19 | +use std::num::Wrapping; |
| 20 | +use std::ops::{Deref, Range}; |
| 21 | + |
| 22 | +/// Errors associated with IOMMU address translation. |
| 23 | +#[derive(Debug, thiserror::Error)] |
| 24 | +pub enum Error { |
| 25 | + /// Lookup cannot be resolved. |
| 26 | + #[error( |
| 27 | + "Cannot translate I/O virtual address range {:#x}+{}: {reason}", |
| 28 | + iova_range.base.0, |
| 29 | + iova_range.length, |
| 30 | + )] |
| 31 | + CannotResolve { |
| 32 | + /// IOVA range that could not be resolved |
| 33 | + iova_range: IovaRange, |
| 34 | + /// Some human-readable specifics about the reason |
| 35 | + reason: String, |
| 36 | + }, |
| 37 | + |
| 38 | + /// Wanted to translate an IOVA range into a single slice, but the range is fragmented. |
| 39 | + #[error( |
| 40 | + "Expected {:#x}+{} to be a continuous I/O virtual address range, but only {continuous_length} bytes are", |
| 41 | + iova_range.base.0, |
| 42 | + iova_range.length, |
| 43 | + )] |
| 44 | + Fragmented { |
| 45 | + /// Full IOVA range that was to be translated |
| 46 | + iova_range: IovaRange, |
| 47 | + /// Length of the continuous head (i.e. the first fragment) |
| 48 | + continuous_length: usize, |
| 49 | + }, |
| 50 | + |
| 51 | + /// IOMMU is not configured correctly, and so cannot translate addresses. |
| 52 | + #[error("IOMMU not configured correctly, cannot operate: {reason}")] |
| 53 | + IommuMisconfigured { |
| 54 | + /// Some human-readable specifics about the misconfiguration |
| 55 | + reason: String, |
| 56 | + }, |
| 57 | +} |
| 58 | + |
| 59 | +/// An IOMMU, allowing translation of I/O virtual addresses (IOVAs). |
| 60 | +/// |
| 61 | +/// Generally, `Iommu` implementaions consist of an [`Iotlb`], which is supposed to be consulted |
| 62 | +/// first for lookup requests. All misses and access failures then should be resolved by looking |
| 63 | +/// up the affected ranges in the actual IOMMU (which has all current mappings) and putting the |
| 64 | +/// results back into the IOTLB. A subsequent lookup in the IOTLB should result in a full |
| 65 | +/// translation, which can then be returned. |
| 66 | +pub trait Iommu: Debug + Send + Sync { |
| 67 | + /// `Deref` type associated with the type that internally wraps the `Iotlb`. |
| 68 | + /// |
| 69 | + /// For example, the `Iommu` may keep the `Iotlb` wrapped in an `RwLock`, making this type |
| 70 | + /// `RwLockReadGuard<'a, Iotlb>`. |
| 71 | + /// |
| 72 | + /// We need this specific type instead of a plain reference so that [`IotlbIterator`] can |
| 73 | + /// actually own the reference and prolong its lifetime. |
| 74 | + type IotlbGuard<'a>: Deref<Target = Iotlb> + 'a |
| 75 | + where |
| 76 | + Self: 'a; |
| 77 | + |
| 78 | + /// Translate the given range for the given access into the underlying address space. |
| 79 | + /// |
| 80 | + /// Any translation request is supposed to be fully served by an internal [`Iotlb`] instance. |
| 81 | + /// Any misses or access failures should result in a lookup in the full IOMMU structures, |
| 82 | + /// filling the IOTLB with the results, and then repeating the lookup in there. |
| 83 | + fn translate( |
| 84 | + &self, |
| 85 | + iova: GuestAddress, |
| 86 | + length: usize, |
| 87 | + access: Permissions, |
| 88 | + ) -> Result<IotlbIterator<Self::IotlbGuard<'_>>, Error>; |
| 89 | +} |
| 90 | + |
| 91 | +/// Mapping target in an IOMMU/IOTLB. |
| 92 | +/// |
| 93 | +/// This is the data to which each entry in an IOMMU/IOTLB maps. |
| 94 | +#[derive(Clone, Copy, Debug, Eq, PartialEq)] |
| 95 | +struct IommuMapping { |
| 96 | + /// Difference between the mapped and the IOVA address, i.e. what to add to an IOVA address to |
| 97 | + /// get the mapped adrress. |
| 98 | + /// |
| 99 | + /// We cannot store the more obvious mapped base address for this range because that would |
| 100 | + /// allow rangemap to wrongfully merge consecutive map entries if they are a duplicate mapping |
| 101 | + /// (which does happen). Storing the difference ensures that entries are only merged when they |
| 102 | + /// are indeed consecutive. |
| 103 | + /// |
| 104 | + /// Note that we make no granularity restrictions (i.e. do not operate on a unit like pages), |
| 105 | + /// so the source and target address may have arbitrary alignment. That is why both fields |
| 106 | + /// here need to be separate and we cannot merge the two bits that are `permissions` with this |
| 107 | + /// base address into a single `u64` field. |
| 108 | + target_source_diff: Wrapping<u64>, |
| 109 | + /// Allowed access for the mapped range |
| 110 | + permissions: Permissions, |
| 111 | +} |
| 112 | + |
| 113 | +/// Provides an IOTLB. |
| 114 | +/// |
| 115 | +/// The IOTLB caches IOMMU mappings. It must be preemptively updated whenever mappings are |
| 116 | +/// restricted or removed; in contrast, adding mappings or making them more permissive does not |
| 117 | +/// require preemptive updates, as subsequent accesses that violate the previous (more restrictive) |
| 118 | +/// permissions will trigger TLB misses or access failures, which is then supposed to result in an |
| 119 | +/// update from the outer [`Iommu`] object that performs the translation. |
| 120 | +#[derive(Debug, Default)] |
| 121 | +pub struct Iotlb { |
| 122 | + /// Mappings of which we know. |
| 123 | + /// |
| 124 | + /// Note that the vhost(-user) specification makes no mention of a specific page size, even |
| 125 | + /// though in practice the IOVA address space will be organized in terms of pages. However, we |
| 126 | + /// cannot really rely on that (or any specific page size; it could be 4k, the guest page size, |
| 127 | + /// or the host page size), so we need to be able to handle continuous ranges of any |
| 128 | + /// granularity. |
| 129 | + tlb: RangeMap<u64, IommuMapping>, |
| 130 | +} |
| 131 | + |
| 132 | +/// Iterates over a range of valid IOTLB mappings that together constitute a continuous range in |
| 133 | +/// I/O virtual address space. |
| 134 | +/// |
| 135 | +/// Returned by [`Iotlb::lookup()`] and [`Iommu::translate()`] in case translation was successful |
| 136 | +/// (i.e. the whole requested range is mapped and permits the given access). |
| 137 | +#[derive(Clone, Debug)] |
| 138 | +pub struct IotlbIterator<D: Deref<Target = Iotlb>> { |
| 139 | + /// IOTLB that provides these mapings |
| 140 | + iotlb: D, |
| 141 | + /// I/O virtual address range left to iterate over |
| 142 | + range: Range<u64>, |
| 143 | + /// Requested access permissions |
| 144 | + access: Permissions, |
| 145 | +} |
| 146 | + |
| 147 | +/// Representation of an IOVA memory range (i.e. in the I/O virtual address space). |
| 148 | +#[derive(Clone, Debug)] |
| 149 | +pub struct IovaRange { |
| 150 | + /// IOVA base address |
| 151 | + pub base: GuestAddress, |
| 152 | + /// Length (in bytes) of this range |
| 153 | + pub length: usize, |
| 154 | +} |
| 155 | + |
| 156 | +/// Representation of a mapped memory range in the underlying address space. |
| 157 | +#[derive(Clone, Debug)] |
| 158 | +pub struct MappedRange { |
| 159 | + /// Base address in the underlying address space |
| 160 | + pub base: GuestAddress, |
| 161 | + /// Length (in bytes) of this mapping |
| 162 | + pub length: usize, |
| 163 | +} |
| 164 | + |
| 165 | +/// Lists the subranges in I/O virtual address space that turned out to not be accessible when |
| 166 | +/// trying to access an IOVA range. |
| 167 | +#[derive(Clone, Debug)] |
| 168 | +pub struct IotlbFails { |
| 169 | + /// Subranges not mapped at all |
| 170 | + pub misses: Vec<IovaRange>, |
| 171 | + /// Subranges that are mapped, but do not allow the requested access mode |
| 172 | + pub access_fails: Vec<IovaRange>, |
| 173 | +} |
| 174 | + |
| 175 | +impl IommuMapping { |
| 176 | + /// Create a new mapping. |
| 177 | + fn new(source_base: u64, target_base: u64, permissions: Permissions) -> Self { |
| 178 | + IommuMapping { |
| 179 | + target_source_diff: Wrapping(target_base) - Wrapping(source_base), |
| 180 | + permissions, |
| 181 | + } |
| 182 | + } |
| 183 | + |
| 184 | + /// Map the given source address (IOVA) to its corresponding target address. |
| 185 | + fn map(&self, iova: u64) -> u64 { |
| 186 | + (Wrapping(iova) + self.target_source_diff).0 |
| 187 | + } |
| 188 | + |
| 189 | + /// Return the permissions for this mapping. |
| 190 | + fn permissions(&self) -> Permissions { |
| 191 | + self.permissions |
| 192 | + } |
| 193 | +} |
| 194 | + |
| 195 | +impl Iotlb { |
| 196 | + /// Create a new empty instance. |
| 197 | + pub fn new() -> Self { |
| 198 | + Default::default() |
| 199 | + } |
| 200 | + |
| 201 | + /// Change the mapping of the given IOVA range. |
| 202 | + pub fn set_mapping( |
| 203 | + &mut self, |
| 204 | + iova: GuestAddress, |
| 205 | + map_to: GuestAddress, |
| 206 | + length: usize, |
| 207 | + perm: Permissions, |
| 208 | + ) -> Result<(), Error> { |
| 209 | + // Soft TODO: We may want to evict old entries here once the TLB grows to a certain size, |
| 210 | + // but that will require LRU book-keeping. However, this is left for the future, because: |
| 211 | + // - this TLB is not implemented in hardware, so we do not really have strong entry count |
| 212 | + // constraints, and |
| 213 | + // - it seems like at least Linux guests invalidate mappings often, automatically limiting |
| 214 | + // our entry count. |
| 215 | + |
| 216 | + let mapping = IommuMapping::new(iova.0, map_to.0, perm); |
| 217 | + self.tlb.insert(iova.0..(iova.0 + length as u64), mapping); |
| 218 | + |
| 219 | + Ok(()) |
| 220 | + } |
| 221 | + |
| 222 | + /// Remove any mapping in the given IOVA range. |
| 223 | + pub fn invalidate_mapping(&mut self, iova: GuestAddress, length: usize) { |
| 224 | + self.tlb.remove(iova.0..(iova.0 + length as u64)); |
| 225 | + } |
| 226 | + |
| 227 | + /// Remove all mappings. |
| 228 | + pub fn invalidate_all(&mut self) { |
| 229 | + self.tlb.clear(); |
| 230 | + } |
| 231 | + |
| 232 | + /// Perform a lookup for the given range and the given `access` mode. |
| 233 | + /// |
| 234 | + /// If the whole range is mapped and accessible, return an iterator over all mappings. |
| 235 | + /// |
| 236 | + /// If any part of the range is not mapped or does not permit the given access mode, return an |
| 237 | + /// `Err(_)` that contains a list of all such subranges. |
| 238 | + pub fn lookup<D: Deref<Target = Iotlb>>( |
| 239 | + this: D, |
| 240 | + iova: GuestAddress, |
| 241 | + length: usize, |
| 242 | + access: Permissions, |
| 243 | + ) -> Result<IotlbIterator<D>, IotlbFails> { |
| 244 | + let full_range = iova.0..(iova.0 + length as u64); |
| 245 | + |
| 246 | + let has_misses = this.tlb.gaps(&full_range).any(|_| true); |
| 247 | + let has_access_fails = this |
| 248 | + .tlb |
| 249 | + .overlapping(full_range.clone()) |
| 250 | + .any(|(_, mapping)| !mapping.permissions().allow(access)); |
| 251 | + |
| 252 | + if has_misses || has_access_fails { |
| 253 | + let misses = this |
| 254 | + .tlb |
| 255 | + .gaps(&full_range) |
| 256 | + .map(|range| { |
| 257 | + // Gaps are always cut down to the range given to `gaps()` |
| 258 | + debug_assert!(range.start >= full_range.start && range.end <= full_range.end); |
| 259 | + range.try_into().unwrap() |
| 260 | + }) |
| 261 | + .collect::<Vec<_>>(); |
| 262 | + |
| 263 | + let access_fails = this |
| 264 | + .tlb |
| 265 | + .overlapping(full_range.clone()) |
| 266 | + .filter(|(_, mapping)| !mapping.permissions().allow(access)) |
| 267 | + .map(|(range, _)| { |
| 268 | + let start = cmp::max(range.start, full_range.start); |
| 269 | + let end = cmp::min(range.end, full_range.end); |
| 270 | + (start..end).try_into().unwrap() |
| 271 | + }) |
| 272 | + .collect::<Vec<_>>(); |
| 273 | + |
| 274 | + return Err(IotlbFails { |
| 275 | + misses, |
| 276 | + access_fails, |
| 277 | + }); |
| 278 | + } |
| 279 | + |
| 280 | + Ok(IotlbIterator { |
| 281 | + iotlb: this, |
| 282 | + range: full_range, |
| 283 | + access, |
| 284 | + }) |
| 285 | + } |
| 286 | +} |
| 287 | + |
| 288 | +impl<D: Deref<Target = Iotlb>> Iterator for IotlbIterator<D> { |
| 289 | + /// Addresses in the underlying address space |
| 290 | + type Item = MappedRange; |
| 291 | + |
| 292 | + fn next(&mut self) -> Option<Self::Item> { |
| 293 | + // Note that we can expect the whole IOVA range to be mapped with the right access flags. |
| 294 | + // The `IotlbIterator` is created by `Iotlb::lookup()` only if the whole range is mapped |
| 295 | + // accessibly; we have a permanent reference to `Iotlb`, so the range cannot be invalidated |
| 296 | + // in the meantime. |
| 297 | + // Another note: It is tempting to have `IotlbIterator` wrap around the |
| 298 | + // `rangemap::Overlapping` iterator, but that just takes a (lifetimed) reference to the |
| 299 | + // map, not an owned reference (like RwLockReadGuard), which we want to use; so using that |
| 300 | + // would probably require self-referential structs. |
| 301 | + |
| 302 | + if self.range.is_empty() { |
| 303 | + return None; |
| 304 | + } |
| 305 | + |
| 306 | + let (range, mapping) = self.iotlb.tlb.get_key_value(&self.range.start).unwrap(); |
| 307 | + |
| 308 | + assert!(mapping.permissions().allow(self.access)); |
| 309 | + |
| 310 | + let mapping_iova_start = self.range.start; |
| 311 | + let mapping_iova_end = cmp::min(self.range.end, range.end); |
| 312 | + let mapping_len = mapping_iova_end - mapping_iova_start; |
| 313 | + |
| 314 | + self.range.start = mapping_iova_end; |
| 315 | + |
| 316 | + Some(MappedRange { |
| 317 | + base: GuestAddress(mapping.map(mapping_iova_start)), |
| 318 | + length: mapping_len.try_into().unwrap(), |
| 319 | + }) |
| 320 | + } |
| 321 | +} |
| 322 | + |
| 323 | +impl TryFrom<Range<u64>> for IovaRange { |
| 324 | + type Error = <u64 as TryFrom<usize>>::Error; |
| 325 | + |
| 326 | + fn try_from(range: Range<u64>) -> Result<Self, Self::Error> { |
| 327 | + Ok(IovaRange { |
| 328 | + base: GuestAddress(range.start), |
| 329 | + length: (range.end - range.start).try_into()?, |
| 330 | + }) |
| 331 | + } |
| 332 | +} |
0 commit comments