Skip to content

Commit 19fe4e7

Browse files
author
Pat Hickey
authored
Merge pull request #56 from bchalios/kernel-6.1
Use `/dev/uesrfaultfd`, when it exists, to create the file descriptor
2 parents 5939721 + 89ff112 commit 19fe4e7

File tree

6 files changed

+90
-13
lines changed

6 files changed

+90
-13
lines changed

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22

33
- Added `Uffd::read_events` that can read multiple events from the userfaultfd file descriptor.
44
- Updated `bitflags` dependency to `2.2.1`.
5+
- Use `/dev/userfaultfd` as the default API for creating userfaultfd file descriptors.
6+
7+
Since Linux 5.11 a process can select if it wants to handle page faults triggered in kernel space
8+
or not. Under this mechanism, processes that wish to handle those, need to have `CAP_SYS_PTRACE`
9+
capability. `CAP_SYS_PTRACE` allows a process to do much more than create userfault fds, so with
10+
6.1 Linux introduces `/dev/userfaultfd`, a special character device that allows creating
11+
userfault file descriptors using the `USERFAULTFD_IOC_NEW` `ioctl`. Access to this device is
12+
granted via file system permissions and does not require `CAP_SYS_PTRACE` to handle kernel
13+
triggered page faults.
14+
15+
We now default to using `/dev/userfaultfd` for creating the descriptors and only if that file is
16+
not present, we fall back to using the syscall.
517

618
### 0.3.1 (2021-02-17)
719

src/builder.rs

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ use crate::raw;
33
use crate::{IoctlFlags, Uffd};
44
use bitflags::bitflags;
55
use nix::errno::Errno;
6+
use std::fs::{File, OpenOptions};
7+
use std::io::ErrorKind;
8+
use std::os::fd::AsRawFd;
9+
10+
const UFFD_DEVICE_PATH: &str = "/dev/userfaultfd";
611

712
cfg_if::cfg_if! {
813
if #[cfg(any(feature = "linux5_7", feature = "linux4_14"))] {
@@ -115,6 +120,47 @@ impl UffdBuilder {
115120
self
116121
}
117122

123+
fn uffd_from_dev(&self, file: &mut File, flags: i32) -> Result<Uffd> {
124+
match unsafe { raw::new_uffd(file.as_raw_fd(), flags) } {
125+
Err(err) => Err(err.into()),
126+
Ok(fd) => Ok(Uffd { fd }),
127+
}
128+
}
129+
130+
fn uffd_from_syscall(&self, flags: i32) -> Result<Uffd> {
131+
let fd = match Errno::result(unsafe { raw::userfaultfd(flags) }) {
132+
Ok(fd) => fd,
133+
// setting the USER_MODE_ONLY flag on kernel pre-5.11 causes it to return EINVAL.
134+
// If the user asks for the flag, we first try with it set, and if kernel gives
135+
// EINVAL we try again without the flag set.
136+
Err(Errno::EINVAL) if self.user_mode_only => Errno::result(unsafe {
137+
raw::userfaultfd(flags & !raw::UFFD_USER_MODE_ONLY as i32)
138+
})?,
139+
Err(e) => return Err(e.into()),
140+
};
141+
142+
// Wrap the fd up so that a failure in this function body closes it with the drop.
143+
Ok(Uffd { fd })
144+
}
145+
146+
// Try to get a UFFD file descriptor using `/dev/userfaultfd`. If that fails
147+
// fall back to calling the system call.
148+
fn open_file_descriptor(&self, flags: i32) -> Result<Uffd> {
149+
// If `/dev/userfaultfd` exists we'll try to get the file descriptor from it. If the file
150+
// doesn't exist we will fall back to calling the system call. This means, that if the
151+
// device exists but the calling process does not have access rights to it, this will fail,
152+
// i.e. we will not fall back to calling the system call.
153+
match OpenOptions::new()
154+
.read(true)
155+
.write(true)
156+
.open(UFFD_DEVICE_PATH)
157+
{
158+
Ok(mut file) => self.uffd_from_dev(&mut file, flags),
159+
Err(err) if err.kind() == ErrorKind::NotFound => self.uffd_from_syscall(flags),
160+
Err(err) => Err(Error::OpenDevUserfaultfd(err)),
161+
}
162+
}
163+
118164
/// Create a `Uffd` object with the current settings of this builder.
119165
pub fn create(&self) -> Result<Uffd> {
120166
// first do the syscall to get the file descriptor
@@ -130,19 +176,7 @@ impl UffdBuilder {
130176
flags |= raw::UFFD_USER_MODE_ONLY as i32;
131177
}
132178

133-
let fd = match Errno::result(unsafe { raw::userfaultfd(flags) }) {
134-
Ok(fd) => fd,
135-
// setting the USER_MODE_ONLY flag on kernel pre-5.11 causes it to return EINVAL.
136-
// If the user asks for the flag, we first try with it set, and if kernel gives
137-
// EINVAL we try again without the flag set.
138-
Err(Errno::EINVAL) if self.user_mode_only => Errno::result(unsafe {
139-
raw::userfaultfd(flags & !raw::UFFD_USER_MODE_ONLY as i32)
140-
})?,
141-
Err(e) => return Err(e.into()),
142-
};
143-
144-
// Wrap the fd up so that a failure in this function body closes it with the drop.
145-
let uffd = Uffd { fd };
179+
let uffd = self.open_file_descriptor(flags)?;
146180

147181
// then do the UFFDIO_API ioctl to set up and ensure features and other ioctls are available
148182
let mut api = raw::uffdio_api {

src/error.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::io;
2+
13
use crate::IoctlFlags;
24
use nix::errno::Errno;
35
use thiserror::Error;
@@ -47,6 +49,10 @@ pub enum Error {
4749
/// Zeropage ioctl failure with `errno` value.
4850
#[error("Zeropage failed: {0}")]
4951
ZeropageFailed(Errno),
52+
53+
/// Could not open /dev/userfaultfd even though it exists
54+
#[error("Error accessing /dev/userfaultfd: {0}")]
55+
OpenDevUserfaultfd(io::Error),
5056
}
5157

5258
impl From<nix::Error> for Error {

src/raw.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,18 @@ nix::ioctl_readwrite!(
2323
_UFFDIO_WRITEPROTECT,
2424
uffdio_writeprotect
2525
);
26+
27+
// ioctls for /dev/userfaultfd
28+
29+
// This is the `/dev/userfaultfd` ioctl() from creating a new userfault file descriptor.
30+
// It is a "bad" ioctl in the sense that it is defined as an _IOC:
31+
// https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/userfaultfd.h#L17,
32+
// aka `nix::ioctl_none`, however it does receive an integer argument:
33+
// https://elixir.bootlin.com/linux/latest/source/fs/userfaultfd.c#L2186. That is the same argument
34+
// that the userfaultfd() system call receives.
35+
nix::ioctl_write_int_bad!(
36+
/// Create a new userfault file descriptor from the `/dev/userfaultfd`
37+
/// device. This receives the same arguments as the userfaultfd system call.
38+
new_uffd,
39+
nix::request_code_none!(USERFAULTFD_IOC, 0x00)
40+
);

userfaultfd-sys/src/consts.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,7 @@ const __u32 _const_UFFDIO_ZEROPAGE = UFFDIO_ZEROPAGE;
6969
#ifdef UFFDIO_WRITEPROTECT
7070
const __u32 _const_UFFDIO_WRITEPROTECT = UFFDIO_WRITEPROTECT;
7171
#endif
72+
73+
#ifdef USERFAULTFD_IOC
74+
const __u32 _const_USERFAULTFD_IOC = USERFAULTFD_IOC;
75+
#endif

userfaultfd-sys/wrapper.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,9 @@
44
// userfaultfd-sys has the same exports on all kernels
55
#define UFFD_USER_MODE_ONLY 1
66
#endif
7+
8+
9+
#ifndef USERFAULTFD_IOC
10+
// Similarly, the ioctl() for `/dev/userfaultfd` is introduced with Linux 6.1.
11+
#define USERFAULTFD_IOC 0xAA
12+
#endif

0 commit comments

Comments
 (0)