Skip to content

Commit f8e6b5a

Browse files
committed
rootfs: make pivot_root not use a temporary directory
Namely, use an undocumented feature of pivot_root(2) where pivot_root(".", ".") is actually a feature and allows you to make the old_root be tied to your /proc/self/cwd in a way that makes unmounting easy. Thanks a lot to the LXC developers which came up with this idea first. This is the first step of many to allowing runC to work with a completely read-only rootfs. Signed-off-by: Aleksa Sarai <[email protected]>
1 parent 88b4c48 commit f8e6b5a

File tree

2 files changed

+46
-33
lines changed

2 files changed

+46
-33
lines changed

libcontainer/configs/config.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,6 @@ type Config struct {
8585
// that the parent process dies.
8686
ParentDeathSignal int `json:"parent_death_signal"`
8787

88-
// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
89-
// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
90-
// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
91-
PivotDir string `json:"pivot_dir"`
92-
9388
// Path to a directory containing the container's root filesystem.
9489
Rootfs string `json:"rootfs"`
9590

libcontainer/rootfs_linux.go

Lines changed: 46 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
8383
if config.NoPivotRoot {
8484
err = msMoveRoot(config.Rootfs)
8585
} else {
86-
err = pivotRoot(config.Rootfs, config.PivotDir)
86+
err = pivotRoot(config.Rootfs)
8787
}
8888
if err != nil {
8989
return newSystemErrorWithCause(err, "jailing process inside rootfs")
@@ -563,48 +563,66 @@ func setupPtmx(config *configs.Config, console *linuxConsole) error {
563563
return nil
564564
}
565565

566-
func pivotRoot(rootfs, pivotBaseDir string) (err error) {
567-
if pivotBaseDir == "" {
568-
pivotBaseDir = "/"
569-
}
570-
tmpDir := filepath.Join(rootfs, pivotBaseDir)
571-
if err := os.MkdirAll(tmpDir, 0755); err != nil {
572-
return fmt.Errorf("can't create tmp dir %s, error %v", tmpDir, err)
566+
// pivotRoot will call pivot_root such that rootfs becomes the new root
567+
// filesystem, and everything else is cleaned up.
568+
func pivotRoot(rootfs string) error {
569+
// While the documentation may claim otherwise, pivot_root(".", ".") is
570+
// actually valid. What this results in is / being the new root but
571+
// /proc/self/cwd being the old root. Since we can play around with the cwd
572+
// with pivot_root this allows us to pivot without creating directories in
573+
// the rootfs. Shout-outs to the LXC developers for giving us this idea.
574+
575+
oldroot, err := syscall.Open("/", syscall.O_DIRECTORY|syscall.O_RDONLY, 0)
576+
if err != nil {
577+
return err
573578
}
574-
pivotDir, err := ioutil.TempDir(tmpDir, ".pivot_root")
579+
defer syscall.Close(oldroot)
580+
581+
newroot, err := syscall.Open(rootfs, syscall.O_DIRECTORY|syscall.O_RDONLY, 0)
575582
if err != nil {
576-
return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err)
583+
return err
577584
}
578-
defer func() {
579-
errVal := os.Remove(pivotDir)
580-
if err == nil {
581-
err = errVal
582-
}
583-
}()
584-
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
585+
defer syscall.Close(newroot)
586+
587+
// Change to the new root so that the pivot_root actually acts on it.
588+
if err := syscall.Fchdir(newroot); err != nil {
589+
return err
590+
}
591+
592+
if err := syscall.PivotRoot(".", "."); err != nil {
585593
// Make the parent mount private
586-
if err := rootfsParentMountPrivate(rootfs); err != nil {
594+
if err := rootfsParentMountPrivate("."); err != nil {
587595
return err
588596
}
597+
589598
// Try again
590-
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
599+
if err := syscall.PivotRoot(".", "."); err != nil {
591600
return fmt.Errorf("pivot_root %s", err)
592601
}
593602
}
594-
if err := syscall.Chdir("/"); err != nil {
595-
return fmt.Errorf("chdir / %s", err)
603+
604+
// Currently our "." is oldroot (according to the current kernel code).
605+
// However, purely for safety, we will fchdir(oldroot) since there isn't
606+
// really any guarantee from the kernel what /proc/self/cwd will be after a
607+
// pivot_root(2).
608+
609+
if err := syscall.Fchdir(oldroot); err != nil {
610+
return err
596611
}
597-
// path to pivot dir now changed, update
598-
pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir))
599612

600-
// Make pivotDir rprivate to make sure any of the unmounts don't
601-
// propagate to parent.
602-
if err := syscall.Mount("", pivotDir, "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
613+
// Make oldroot rprivate to make sure our unmounts don't propogate to the
614+
// host (and thus bork the machine).
615+
if err := syscall.Mount("", ".", "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
616+
return err
617+
}
618+
// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
619+
if err := syscall.Unmount(".", syscall.MNT_DETACH); err != nil {
603620
return err
604621
}
605622

606-
if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
607-
return fmt.Errorf("unmount pivot_root dir %s", err)
623+
// Switch back to our shiny new root.
624+
if err := syscall.Chdir("/"); err != nil {
625+
return fmt.Errorf("chdir / %s", err)
608626
}
609627
return nil
610628
}

0 commit comments

Comments
 (0)