Skip to content

Commit f81ae83

Browse files
committed
install: Automatically set up /dev and /var/lib/containers
We're looking again at the ergonomics of `bootc install to-existing-root`. This uses the "mounting into mount namespaces" from the new mount API to automatically set up `/dev` and `/var/lib/containers` if they weren't provided to `podman run`, which shrinks what's needed a bit. Closes: #826 Signed-off-by: Colin Walters <[email protected]>
1 parent b9360a9 commit f81ae83

File tree

2 files changed

+148
-11
lines changed

2 files changed

+148
-11
lines changed

lib/src/install.rs

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,6 +1269,8 @@ async fn prepare_install(
12691269
tracing::debug!("Target image reference: {target_imgref}");
12701270

12711271
// A bit of basic global state setup
1272+
crate::mount::ensure_mirrored_host_mount("/dev")?;
1273+
crate::mount::ensure_mirrored_host_mount("/var/lib/containers")?;
12721274
ensure_var()?;
12731275
setup_tmp_mounts()?;
12741276
// Allocate a temporary directory we can use in various places to avoid
@@ -1454,12 +1456,6 @@ async fn install_to_filesystem_impl(state: &State, rootfs: &mut RootSetup) -> Re
14541456
.ok_or_else(|| anyhow!("No uuid for boot/root"))?;
14551457
tracing::debug!("boot uuid={boot_uuid}");
14561458

1457-
// If we're doing an alongside install, then the /dev bootupd sees needs to be the host's.
1458-
ensure!(
1459-
crate::mount::is_same_as_host(Utf8Path::new("/dev"))?,
1460-
"Missing /dev mount to host /dev"
1461-
);
1462-
14631459
let bound_images = BoundImages::from_state(state).await?;
14641460

14651461
// Initialize the ostree sysroot (repo, stateroot, etc.)
@@ -1514,9 +1510,6 @@ pub(crate) async fn install_to_disk(mut opts: InstallToDiskOpts) -> Result<()> {
15141510
block_opts.device
15151511
);
15161512
}
1517-
if !crate::mount::is_same_as_host(Utf8Path::new("/dev"))? {
1518-
anyhow::bail!("Loopback mounts (--via-loopback) require host devices (-v /dev:/dev)");
1519-
}
15201513
} else if !target_blockdev_meta.file_type().is_block_device() {
15211514
anyhow::bail!("Not a block device: {}", block_opts.device);
15221515
}

lib/src/mount.rs

Lines changed: 146 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,24 @@
11
//! Helpers for interacting with mountpoints
22
3-
use std::process::Command;
3+
use std::{
4+
fs,
5+
os::fd::{AsFd, OwnedFd},
6+
process::Command,
7+
};
48

5-
use anyhow::{anyhow, Result};
9+
use anyhow::{anyhow, Context, Result};
610
use bootc_utils::CommandRunExt;
711
use camino::Utf8Path;
812
use fn_error_context::context;
13+
use rustix::{
14+
mount::{MoveMountFlags, OpenTreeFlags},
15+
net::{
16+
AddressFamily, RecvFlags, SendAncillaryBuffer, SendAncillaryMessage, SendFlags,
17+
SocketFlags, SocketType,
18+
},
19+
process::WaitOptions,
20+
thread::Pid,
21+
};
922
use serde::Deserialize;
1023

1124
use crate::task::Task;
@@ -124,3 +137,134 @@ pub(crate) fn is_same_as_host(path: &Utf8Path) -> Result<bool> {
124137
);
125138
Ok(devstat.f_fsid == hostdevstat.f_fsid)
126139
}
140+
141+
/// Given a pid, enter its mount namespace and acquire a file descriptor
142+
/// for a mount from that namespace.
143+
#[allow(unsafe_code)]
144+
#[context("Opening mount tree from pid")]
145+
pub(crate) fn open_tree_from_pidns(
146+
pid: rustix::process::Pid,
147+
path: &Utf8Path,
148+
recursive: bool,
149+
) -> Result<OwnedFd> {
150+
// Allocate a socket pair to use for sending file descriptors.
151+
let (sock_parent, sock_child) = rustix::net::socketpair(
152+
AddressFamily::UNIX,
153+
SocketType::STREAM,
154+
SocketFlags::CLOEXEC,
155+
None,
156+
)
157+
.context("socketpair")?;
158+
const DUMMY_DATA: &[u8] = &[b'!'];
159+
match unsafe { libc::fork() } {
160+
0 => {
161+
// We're in the child. At this point we know we don't have multiple threads, so we
162+
// can safely `setns`.
163+
164+
// Open up the namespace of the target process as a file descriptor, and enter it.
165+
let pidlink = fs::File::open(format!("/proc/{}/ns/mnt", pid.as_raw_nonzero()))?;
166+
rustix::thread::move_into_link_name_space(
167+
pidlink.as_fd(),
168+
Some(rustix::thread::LinkNameSpaceType::Mount),
169+
)
170+
.context("setns")?;
171+
172+
// Open the target mount path as a file descriptor.
173+
let recursive = if recursive {
174+
OpenTreeFlags::AT_RECURSIVE
175+
} else {
176+
OpenTreeFlags::empty()
177+
};
178+
let fd = rustix::mount::open_tree(
179+
rustix::fs::CWD,
180+
path.as_std_path(),
181+
OpenTreeFlags::OPEN_TREE_CLOEXEC | OpenTreeFlags::OPEN_TREE_CLONE | recursive,
182+
)
183+
.context("open_tree")?;
184+
185+
// And send that file descriptor via fd passing over the socketpair.
186+
let fd = fd.as_fd();
187+
let fds = [fd];
188+
let mut buffer = [0u8; rustix::cmsg_space!(ScmRights(1))];
189+
let mut control = SendAncillaryBuffer::new(&mut buffer);
190+
let pushed = control.push(SendAncillaryMessage::ScmRights(&fds));
191+
assert!(pushed);
192+
let ios = std::io::IoSlice::new(DUMMY_DATA);
193+
rustix::net::sendmsg(sock_child, &[ios], &mut control, SendFlags::empty())?;
194+
// Then we're done.
195+
std::process::exit(0)
196+
}
197+
-1 => {
198+
// fork failed
199+
let e = std::io::Error::last_os_error();
200+
anyhow::bail!("failed to fork: {e}");
201+
}
202+
n => {
203+
// We're in the parent; create a pid (checking that n > 0).
204+
let pid = rustix::process::Pid::from_raw(n).unwrap();
205+
// Receive the mount file descriptor from the child
206+
let mut cmsg_space = vec![0; rustix::cmsg_space!(ScmRights(1))];
207+
let mut cmsg_buffer = rustix::net::RecvAncillaryBuffer::new(&mut cmsg_space);
208+
let mut buf = [0u8; DUMMY_DATA.len()];
209+
let iov = std::io::IoSliceMut::new(buf.as_mut());
210+
let mut iov = [iov];
211+
let nread = rustix::net::recvmsg(
212+
sock_parent,
213+
&mut iov,
214+
&mut cmsg_buffer,
215+
RecvFlags::CMSG_CLOEXEC,
216+
)
217+
.context("recvmsg")?
218+
.bytes;
219+
assert_eq!(nread, DUMMY_DATA.len());
220+
assert_eq!(buf, DUMMY_DATA);
221+
// And extract the file descriptor
222+
let r = cmsg_buffer
223+
.drain()
224+
.filter_map(|m| match m {
225+
rustix::net::RecvAncillaryMessage::ScmRights(f) => Some(f),
226+
_ => None,
227+
})
228+
.flatten()
229+
.next()
230+
.ok_or_else(|| anyhow::anyhow!("Did not receive a file descriptor"))?;
231+
rustix::process::waitpid(Some(pid), WaitOptions::empty())?;
232+
Ok(r)
233+
}
234+
}
235+
}
236+
237+
/// Create a bind mount from the mount namespace of the target pid
238+
/// into our mount namespace.
239+
pub(crate) fn bind_mount_from_pidns(
240+
pid: Pid,
241+
src: &Utf8Path,
242+
target: &Utf8Path,
243+
recursive: bool,
244+
) -> Result<()> {
245+
let src = open_tree_from_pidns(pid, src, recursive)?;
246+
rustix::mount::move_mount(
247+
src.as_fd(),
248+
"",
249+
rustix::fs::CWD,
250+
target.as_std_path(),
251+
MoveMountFlags::MOVE_MOUNT_F_EMPTY_PATH,
252+
)
253+
.context("Moving mount")?;
254+
Ok(())
255+
}
256+
257+
// If the target path is not already mirrored from the host (e.g. via -v /dev:/dev)
258+
// then recursively mount it.
259+
pub(crate) fn ensure_mirrored_host_mount(path: impl AsRef<Utf8Path>) -> Result<()> {
260+
let path = path.as_ref();
261+
// If we didn't have this in our filesystem already (e.g. for /var/lib/containers)
262+
// then create it now.
263+
std::fs::create_dir_all(path)?;
264+
if is_same_as_host(path)? {
265+
tracing::debug!("Already mounted from host: {path}");
266+
return Ok(());
267+
}
268+
tracing::debug!("Propagating host mount: {path}");
269+
bind_mount_from_pidns(Pid::from_raw(1).unwrap(), path, path, true)
270+
}

0 commit comments

Comments
 (0)