Skip to content

Commit 1af4932

Browse files
committed
install: Drop host bind mount code that never worked
It only appeared to kind of work because we ended up with a copy of the host rootfs for some reason. As the comment in the code indicates, what we're really trying to do here is the problem domain covered by https://brauner.io/2023/02/28/mounting-into-mount-namespaces.html where we want to dynamically insert mounts into a running container. I was thinking about this and with some experimentation I realized that while we can't change the mount namespacing to make `/tmp` in our container be the same thing as the host `/tmp`, the "magic link" for `/proc/1/root` *will* traverse outside our mount namespace, so it works to just symlink `/tmp` -> `/proc/1/root/tmp`. It's not beautiful, but it works. Signed-off-by: Colin Walters <[email protected]>
1 parent 7b02233 commit 1af4932

File tree

1 file changed

+37
-28
lines changed

1 file changed

+37
-28
lines changed

lib/src/install.rs

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -382,23 +382,6 @@ mod config {
382382
}
383383
}
384384

385-
fn bind_mount_from_host(src: impl AsRef<Utf8Path>, dest: impl AsRef<Utf8Path>) -> Result<()> {
386-
let src = src.as_ref();
387-
let dest = dest.as_ref();
388-
tracing::debug!("Mounting host {src} to {dest}");
389-
std::fs::create_dir_all(dest).with_context(|| format!("Creating {dest}"))?;
390-
// Here's the magic trick; modern versions of the `mount` command support a `-N` argument
391-
// to perform the mount in a distinct target namespace. But, what we want to is the inverse
392-
// of this - we want to grab a host/root filesystem mount point. So we explicitly enter
393-
// the host's mount namespace, then give `mount` our own pid (from which it finds the mount namespace).
394-
let desc = format!("Bind mounting {src} from host");
395-
let target = format!("{}", nix::unistd::getpid());
396-
Task::new_cmd(desc, run_in_host_mountns("mount"))
397-
.quiet()
398-
.args(["--bind", "-N", target.as_str(), src.as_str(), dest.as_str()])
399-
.run()
400-
}
401-
402385
#[context("Creating ostree deployment")]
403386
async fn initialize_ostree_root_from_self(
404387
state: &State,
@@ -682,6 +665,41 @@ fn require_systemd_pid1() -> Result<()> {
682665
Ok(())
683666
}
684667

668+
/// We want to have proper /tmp and /var/tmp without requiring the caller to set them up
669+
/// in advance by manually specifying them via `podman run -v /tmp:/tmp` etc.
670+
/// Unfortunately, it's quite complex right now to "gracefully" dynamically reconfigure
671+
/// the mount setup for a container. See https://brauner.io/2023/02/28/mounting-into-mount-namespaces.html
672+
/// So the brutal hack we do here is to rely on the fact that we're running in the host
673+
/// pid namespace, and so the magic link for /proc/1/root will escape our mount namespace.
674+
/// We can't bind mount though - we need to symlink it so that each calling process
675+
/// will traverse the link.
676+
#[context("Linking tmp mounts to host")]
677+
pub(crate) fn propagate_tmp_mounts_to_host() -> Result<()> {
678+
// Point our /tmp and /var/tmp at the host, via the /proc/1/root magic link
679+
for path in ["/tmp", "/var/tmp"].map(Utf8Path::new) {
680+
let target = format!("/proc/1/root/{path}");
681+
let tmp = format!("{path}.tmp");
682+
// Ensure idempotence in case we're re-executed
683+
if path.is_symlink() {
684+
continue;
685+
}
686+
std::os::unix::fs::symlink(&target, &tmp)
687+
.with_context(|| format!("Symlinking {target} to {tmp}"))?;
688+
let cwd = rustix::fs::cwd();
689+
rustix::fs::renameat_with(
690+
cwd,
691+
path.as_os_str(),
692+
cwd,
693+
&tmp,
694+
rustix::fs::RenameFlags::EXCHANGE,
695+
)
696+
.with_context(|| format!("Exchanging {path} <=> {tmp}"))?;
697+
std::fs::rename(&tmp, format!("{path}.old"))
698+
.with_context(|| format!("Renaming old {tmp}"))?;
699+
}
700+
Ok(())
701+
}
702+
685703
/// Preparation for an install; validates and prepares some (thereafter immutable) global state.
686704
async fn prepare_install(
687705
config_opts: InstallConfigOpts,
@@ -695,21 +713,14 @@ async fn prepare_install(
695713
let container_info = crate::containerenv::get_container_execution_info()?;
696714
let source = SourceInfo::from_container(&container_info)?;
697715

716+
propagate_tmp_mounts_to_host()?;
717+
698718
// Even though we require running in a container, the mounts we create should be specific
699719
// to this process, so let's enter a private mountns to avoid leaking them.
700720
if std::env::var_os("BOOTC_SKIP_UNSHARE").is_none() {
701721
super::cli::ensure_self_unshared_mount_namespace().await?;
702722
}
703723

704-
// Let's ensure we have a tmpfs on /tmp, because we need that to write the SELinux label
705-
// (it won't work on the default overlayfs)
706-
if nix::sys::statfs::statfs("/tmp")?.filesystem_type() != nix::sys::statfs::TMPFS_MAGIC {
707-
Task::new("Creating tmpfs on /tmp", "mount")
708-
.quiet()
709-
.args(["-t", "tmpfs", "tmpfs", "/tmp"])
710-
.run()?;
711-
}
712-
713724
// Now, deal with SELinux state.
714725
let override_disable_selinux =
715726
reexecute_self_for_selinux_if_needed(&source, config_opts.disable_selinux)?;
@@ -719,8 +730,6 @@ async fn prepare_install(
719730
// Create our global (read-only) state which gets wrapped in an Arc
720731
// so we can pass it to worker threads too. Right now this just
721732
// combines our command line options along with some bind mounts from the host.
722-
// Overmount /var/tmp with the host's, so we can use it to share state
723-
bind_mount_from_host("/var/tmp", "/var/tmp")?;
724733
let state = Arc::new(State {
725734
override_disable_selinux,
726735
source,

0 commit comments

Comments
 (0)