Skip to content

Commit d112c2d

Browse files
committed
Add Iommu trait and Iotlb struct
The Iommu trait defines an interface for translating virtual addresses into addresses in an underlying address space. It is supposed to do so by internally keeping an instance of the Iotlb type, updating it with mappings whenever necessary (e.g. when actively invalidated or when there’s an access failure) from some internal data source (e.g. for a vhost-user IOMMU, the data comes from the vhost-user front-end by requesting an update). In a later commit, we are going to provide an implementation of `IoMemory` that can use an `Iommu` to provide an I/O virtual address space. Note that while I/O virtual memory in practice will be organized in pages, the vhost-user specification makes no mention of a specific page size or how to obtain it. Therefore, we cannot really assume any page size and have to use plain ranges with byte granularity as mappings instead. Signed-off-by: Hanna Czenczek <[email protected]>
1 parent e652da4 commit d112c2d

File tree

5 files changed

+349
-3
lines changed

5 files changed

+349
-3
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,15 @@ default = ["rawfd"]
1616
backend-bitmap = []
1717
backend-mmap = ["dep:libc"]
1818
backend-atomic = ["arc-swap"]
19+
iommu = ["dep:rangemap"]
1920
rawfd = ["dep:libc"]
2021
xen = ["backend-mmap", "bitflags", "vmm-sys-util"]
2122

2223
[dependencies]
2324
libc = { version = "0.2.39", optional = true }
2425
arc-swap = { version = "1.0.0", optional = true }
2526
bitflags = { version = "2.4.0", optional = true }
27+
rangemap = { version = "1.5.1", optional = true }
2628
thiserror = "1.0.40"
2729
vmm-sys-util = { version = "0.12.1", optional = true }
2830

src/atomic.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ impl<M: IoMemory> GuestMemoryExclusiveGuard<'_, M> {
143143
#[cfg(feature = "backend-mmap")]
144144
mod tests {
145145
use super::*;
146-
use crate::{GuestAddress, GuestMemory, GuestMemoryRegion, GuestUsize, MmapRegion};
146+
use crate::{GuestAddress, GuestMemory, GuestMemoryRegion, GuestUsize, IoMemory, MmapRegion};
147147

148148
type GuestMemoryMmap = crate::GuestMemoryMmap<()>;
149149
type GuestRegionMmap = crate::GuestRegionMmap<()>;
@@ -159,7 +159,8 @@ mod tests {
159159
let mut iterated_regions = Vec::new();
160160
let gmm = GuestMemoryMmap::from_ranges(&regions).unwrap();
161161
let gm = GuestMemoryMmapAtomic::new(gmm);
162-
let mem = gm.memory();
162+
let vmem = gm.memory();
163+
let mem = vmem.physical_memory().unwrap();
163164

164165
for region in mem.iter() {
165166
assert_eq!(region.len(), region_size as GuestUsize);
@@ -178,7 +179,7 @@ mod tests {
178179
.map(|x| (x.0, x.1))
179180
.eq(iterated_regions.iter().copied()));
180181

181-
let mem2 = mem.into_inner();
182+
let mem2 = vmem.into_inner();
182183
for region in mem2.iter() {
183184
assert_eq!(region.len(), region_size as GuestUsize);
184185
}

src/guest_memory.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ use crate::address::{Address, AddressValue};
5454
use crate::bitmap::{Bitmap, BS, MS};
5555
use crate::bytes::{AtomicAccess, Bytes};
5656
use crate::io::{ReadVolatile, WriteVolatile};
57+
#[cfg(feature = "iommu")]
58+
use crate::iommu::Error as IommuError;
5759
use crate::volatile_memory::{self, VolatileSlice};
5860
use crate::{IoMemory, Permissions};
5961

@@ -84,6 +86,10 @@ pub enum Error {
8486
/// The address to be read by `try_access` is outside the address range.
8587
#[error("The address to be read by `try_access` is outside the address range")]
8688
GuestAddressOverflow,
89+
#[cfg(feature = "iommu")]
90+
/// IOMMU translation error
91+
#[error("IOMMU failed to translate guest address: {0}")]
92+
IommuError(IommuError),
8793
}
8894

8995
impl From<volatile_memory::Error> for Error {

src/iommu.rs

Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
// Copyright (C) 2025 Red Hat. All rights reserved.
2+
//
3+
// Use of this source code is governed by a BSD-style license that can be
4+
// found in the LICENSE-BSD-3-Clause file.
5+
//
6+
// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
7+
8+
//! Provide an interface for IOMMUs enabling I/O virtual address (IOVA) translation.
9+
//!
10+
//! All IOMMUs consist of an IOTLB ([`Iotlb`]), which is backed by a data source that can deliver
11+
//! all mappings. For example, for vhost-user, that data source is the vhost-user front-end; i.e.
12+
//! IOTLB misses require sending a notification to the front-end and awaiting a reply that supplies
13+
//! the desired mapping.
14+
15+
use crate::{GuestAddress, Permissions};
16+
use rangemap::RangeMap;
17+
use std::cmp;
18+
use std::fmt::Debug;
19+
use std::num::Wrapping;
20+
use std::ops::{Deref, Range};
21+
22+
/// Errors associated with IOMMU address translation.
23+
#[derive(Debug, thiserror::Error)]
24+
pub enum Error {
25+
/// Lookup cannot be resolved.
26+
#[error(
27+
"Cannot translate I/O virtual address range {:#x}+{}: {reason}",
28+
iova_range.base.0,
29+
iova_range.length,
30+
)]
31+
CannotResolve {
32+
/// IOVA range that could not be resolved
33+
iova_range: IovaRange,
34+
/// Some human-readable specifics about the reason
35+
reason: String,
36+
},
37+
38+
/// Wanted to translate an IOVA range into a single slice, but the range is fragmented.
39+
#[error(
40+
"Expected {:#x}+{} to be a continuous I/O virtual address range, but only {continuous_length} bytes are",
41+
iova_range.base.0,
42+
iova_range.length,
43+
)]
44+
Fragmented {
45+
/// Full IOVA range that was to be translated
46+
iova_range: IovaRange,
47+
/// Length of the continuous head (i.e. the first fragment)
48+
continuous_length: usize,
49+
},
50+
51+
/// IOMMU is not configured correctly, and so cannot translate addresses.
52+
#[error("IOMMU not configured correctly, cannot operate: {reason}")]
53+
IommuMisconfigured {
54+
/// Some human-readable specifics about the misconfiguration
55+
reason: String,
56+
},
57+
}
58+
59+
/// An IOMMU, allowing translation of I/O virtual addresses (IOVAs).
60+
///
61+
/// Generally, `Iommu` implementaions consist of an [`Iotlb`], which is supposed to be consulted
62+
/// first for lookup requests. All misses and access failures then should be resolved by looking
63+
/// up the affected ranges in the actual IOMMU (which has all current mappings) and putting the
64+
/// results back into the IOTLB. A subsequent lookup in the IOTLB should result in a full
65+
/// translation, which can then be returned.
66+
pub trait Iommu: Debug + Send + Sync {
67+
/// `Deref` type associated with the type that internally wraps the `Iotlb`.
68+
///
69+
/// For example, the `Iommu` may keep the `Iotlb` wrapped in an `RwLock`, making this type
70+
/// `RwLockReadGuard<'a, Iotlb>`.
71+
///
72+
/// We need this specific type instead of a plain reference so that [`IotlbIterator`] can
73+
/// actually own the reference and prolong its lifetime.
74+
type IotlbGuard<'a>: Deref<Target = Iotlb> + 'a
75+
where
76+
Self: 'a;
77+
78+
/// Translate the given range for the given access into the underlying address space.
79+
///
80+
/// Any translation request is supposed to be fully served by an internal [`Iotlb`] instance.
81+
/// Any misses or access failures should result in a lookup in the full IOMMU structures,
82+
/// filling the IOTLB with the results, and then repeating the lookup in there.
83+
fn translate(
84+
&self,
85+
iova: GuestAddress,
86+
length: usize,
87+
access: Permissions,
88+
) -> Result<IotlbIterator<Self::IotlbGuard<'_>>, Error>;
89+
}
90+
91+
/// Mapping target in an IOMMU/IOTLB.
92+
///
93+
/// This is the data to which each entry in an IOMMU/IOTLB maps.
94+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
95+
struct IommuMapping {
96+
/// Difference between the mapped and the IOVA address, i.e. what to add to an IOVA address to
97+
/// get the mapped adrress.
98+
///
99+
/// We cannot store the more obvious mapped base address for this range because that would
100+
/// allow rangemap to wrongfully merge consecutive map entries if they are a duplicate mapping
101+
/// (which does happen). Storing the difference ensures that entries are only merged when they
102+
/// are indeed consecutive.
103+
///
104+
/// Note that we make no granularity restrictions (i.e. do not operate on a unit like pages),
105+
/// so the source and target address may have arbitrary alignment. That is why both fields
106+
/// here need to be separate and we cannot merge the two bits that are `permissions` with this
107+
/// base address into a single `u64` field.
108+
target_source_diff: Wrapping<u64>,
109+
/// Allowed access for the mapped range
110+
permissions: Permissions,
111+
}
112+
113+
/// Provides an IOTLB.
114+
///
115+
/// The IOTLB caches IOMMU mappings. It must be preemptively updated whenever mappings are
116+
/// restricted or removed; in contrast, adding mappings or making them more permissive does not
117+
/// require preemptive updates, as subsequent accesses that violate the previous (more restrictive)
118+
/// permissions will trigger TLB misses or access failures, which is then supposed to result in an
119+
/// update from the outer [`Iommu`] object that performs the translation.
120+
#[derive(Debug, Default)]
121+
pub struct Iotlb {
122+
/// Mappings of which we know.
123+
///
124+
/// Note that the vhost(-user) specification makes no mention of a specific page size, even
125+
/// though in practice the IOVA address space will be organized in terms of pages. However, we
126+
/// cannot really rely on that (or any specific page size; it could be 4k, the guest page size,
127+
/// or the host page size), so we need to be able to handle continuous ranges of any
128+
/// granularity.
129+
tlb: RangeMap<u64, IommuMapping>,
130+
}
131+
132+
/// Iterates over a range of valid IOTLB mappings that together constitute a continuous range in
133+
/// I/O virtual address space.
134+
///
135+
/// Returned by [`Iotlb::lookup()`] and [`Iommu::translate()`] in case translation was successful
136+
/// (i.e. the whole requested range is mapped and permits the given access).
137+
#[derive(Clone, Debug)]
138+
pub struct IotlbIterator<D: Deref<Target = Iotlb>> {
139+
/// IOTLB that provides these mapings
140+
iotlb: D,
141+
/// I/O virtual address range left to iterate over
142+
range: Range<u64>,
143+
/// Requested access permissions
144+
access: Permissions,
145+
}
146+
147+
/// Representation of an IOVA memory range (i.e. in the I/O virtual address space).
148+
#[derive(Clone, Debug)]
149+
pub struct IovaRange {
150+
/// IOVA base address
151+
pub base: GuestAddress,
152+
/// Length (in bytes) of this range
153+
pub length: usize,
154+
}
155+
156+
/// Representation of a mapped memory range in the underlying address space.
157+
#[derive(Clone, Debug)]
158+
pub struct MappedRange {
159+
/// Base address in the underlying address space
160+
pub base: GuestAddress,
161+
/// Length (in bytes) of this mapping
162+
pub length: usize,
163+
}
164+
165+
/// Lists the subranges in I/O virtual address space that turned out to not be accessible when
166+
/// trying to access an IOVA range.
167+
#[derive(Clone, Debug)]
168+
pub struct IotlbFails {
169+
/// Subranges not mapped at all
170+
pub misses: Vec<IovaRange>,
171+
/// Subranges that are mapped, but do not allow the requested access mode
172+
pub access_fails: Vec<IovaRange>,
173+
}
174+
175+
impl IommuMapping {
176+
/// Create a new mapping.
177+
fn new(source_base: u64, target_base: u64, permissions: Permissions) -> Self {
178+
IommuMapping {
179+
target_source_diff: Wrapping(target_base) - Wrapping(source_base),
180+
permissions,
181+
}
182+
}
183+
184+
/// Map the given source address (IOVA) to its corresponding target address.
185+
fn map(&self, iova: u64) -> u64 {
186+
(Wrapping(iova) + self.target_source_diff).0
187+
}
188+
189+
/// Return the permissions for this mapping.
190+
fn permissions(&self) -> Permissions {
191+
self.permissions
192+
}
193+
}
194+
195+
impl Iotlb {
196+
/// Create a new empty instance.
197+
pub fn new() -> Self {
198+
Default::default()
199+
}
200+
201+
/// Change the mapping of the given IOVA range.
202+
pub fn set_mapping(
203+
&mut self,
204+
iova: GuestAddress,
205+
map_to: GuestAddress,
206+
length: usize,
207+
perm: Permissions,
208+
) -> Result<(), Error> {
209+
// Soft TODO: We may want to evict old entries here once the TLB grows to a certain size,
210+
// but that will require LRU book-keeping. However, this is left for the future, because:
211+
// - this TLB is not implemented in hardware, so we do not really have strong entry count
212+
// constraints, and
213+
// - it seems like at least Linux guests invalidate mappings often, automatically limiting
214+
// our entry count.
215+
216+
let mapping = IommuMapping::new(iova.0, map_to.0, perm);
217+
self.tlb.insert(iova.0..(iova.0 + length as u64), mapping);
218+
219+
Ok(())
220+
}
221+
222+
/// Remove any mapping in the given IOVA range.
223+
pub fn invalidate_mapping(&mut self, iova: GuestAddress, length: usize) {
224+
self.tlb.remove(iova.0..(iova.0 + length as u64));
225+
}
226+
227+
/// Remove all mappings.
228+
pub fn invalidate_all(&mut self) {
229+
self.tlb.clear();
230+
}
231+
232+
/// Perform a lookup for the given range and the given `access` mode.
233+
///
234+
/// If the whole range is mapped and accessible, return an iterator over all mappings.
235+
///
236+
/// If any part of the range is not mapped or does not permit the given access mode, return an
237+
/// `Err(_)` that contains a list of all such subranges.
238+
pub fn lookup<D: Deref<Target = Iotlb>>(
239+
this: D,
240+
iova: GuestAddress,
241+
length: usize,
242+
access: Permissions,
243+
) -> Result<IotlbIterator<D>, IotlbFails> {
244+
let full_range = iova.0..(iova.0 + length as u64);
245+
246+
let has_misses = this.tlb.gaps(&full_range).any(|_| true);
247+
let has_access_fails = this
248+
.tlb
249+
.overlapping(full_range.clone())
250+
.any(|(_, mapping)| !mapping.permissions().allow(access));
251+
252+
if has_misses || has_access_fails {
253+
let misses = this
254+
.tlb
255+
.gaps(&full_range)
256+
.map(|range| {
257+
// Gaps are always cut down to the range given to `gaps()`
258+
debug_assert!(range.start >= full_range.start && range.end <= full_range.end);
259+
range.try_into().unwrap()
260+
})
261+
.collect::<Vec<_>>();
262+
263+
let access_fails = this
264+
.tlb
265+
.overlapping(full_range.clone())
266+
.filter(|(_, mapping)| !mapping.permissions().allow(access))
267+
.map(|(range, _)| {
268+
let start = cmp::max(range.start, full_range.start);
269+
let end = cmp::min(range.end, full_range.end);
270+
(start..end).try_into().unwrap()
271+
})
272+
.collect::<Vec<_>>();
273+
274+
return Err(IotlbFails {
275+
misses,
276+
access_fails,
277+
});
278+
}
279+
280+
Ok(IotlbIterator {
281+
iotlb: this,
282+
range: full_range,
283+
access,
284+
})
285+
}
286+
}
287+
288+
impl<D: Deref<Target = Iotlb>> Iterator for IotlbIterator<D> {
289+
/// Addresses in the underlying address space
290+
type Item = MappedRange;
291+
292+
fn next(&mut self) -> Option<Self::Item> {
293+
// Note that we can expect the whole IOVA range to be mapped with the right access flags.
294+
// The `IotlbIterator` is created by `Iotlb::lookup()` only if the whole range is mapped
295+
// accessibly; we have a permanent reference to `Iotlb`, so the range cannot be invalidated
296+
// in the meantime.
297+
// Another note: It is tempting to have `IotlbIterator` wrap around the
298+
// `rangemap::Overlapping` iterator, but that just takes a (lifetimed) reference to the
299+
// map, not an owned reference (like RwLockReadGuard), which we want to use; so using that
300+
// would probably require self-referential structs.
301+
302+
if self.range.is_empty() {
303+
return None;
304+
}
305+
306+
let (range, mapping) = self.iotlb.tlb.get_key_value(&self.range.start).unwrap();
307+
308+
assert!(mapping.permissions().allow(self.access));
309+
310+
let mapping_iova_start = self.range.start;
311+
let mapping_iova_end = cmp::min(self.range.end, range.end);
312+
let mapping_len = mapping_iova_end - mapping_iova_start;
313+
314+
self.range.start = mapping_iova_end;
315+
316+
Some(MappedRange {
317+
base: GuestAddress(mapping.map(mapping_iova_start)),
318+
length: mapping_len.try_into().unwrap(),
319+
})
320+
}
321+
}
322+
323+
impl TryFrom<Range<u64>> for IovaRange {
324+
type Error = <u64 as TryFrom<usize>>::Error;
325+
326+
fn try_from(range: Range<u64>) -> Result<Self, Self::Error> {
327+
Ok(IovaRange {
328+
base: GuestAddress(range.start),
329+
length: (range.end - range.start).try_into()?,
330+
})
331+
}
332+
}

0 commit comments

Comments
 (0)