Skip to content

Commit 3ddde27

Browse files
committed
init: move close(stateDirFd) before seccomp apply
This further reduces the number of syscalls that a user needs to enable in their seccomp profile. Signed-off-by: Aleksa Sarai <[email protected]>
1 parent 1c81e2a commit 3ddde27

File tree

1 file changed

+24
-16
lines changed

1 file changed

+24
-16
lines changed

libcontainer/standard_init_linux.go

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,28 +30,28 @@ func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
3030
var newperms uint32
3131

3232
if l.config.Config.Namespaces.Contains(configs.NEWUSER) {
33-
// with user ns we need 'other' search permissions
33+
// With user ns we need 'other' search permissions.
3434
newperms = 0x8
3535
} else {
36-
// without user ns we need 'UID' search permissions
36+
// Without user ns we need 'UID' search permissions.
3737
newperms = 0x80000
3838
}
3939

40-
// create a unique per session container name that we can
41-
// join in setns; however, other containers can also join it
40+
// Create a unique per session container name that we can join in setns;
41+
// However, other containers can also join it.
4242
return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
4343
}
4444

4545
func (l *linuxStandardInit) Init() error {
4646
if !l.config.Config.NoNewKeyring {
4747
ringname, keepperms, newperms := l.getSessionRingParams()
4848

49-
// do not inherit the parent's session keyring
49+
// Do not inherit the parent's session keyring.
5050
sessKeyId, err := keys.JoinSessionKeyring(ringname)
5151
if err != nil {
5252
return err
5353
}
54-
// make session keyring searcheable
54+
// Make session keyring searcheable.
5555
if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
5656
return err
5757
}
@@ -150,39 +150,47 @@ func (l *linuxStandardInit) Init() error {
150150
if err := pdeath.Restore(); err != nil {
151151
return err
152152
}
153-
// compare the parent from the initial start of the init process and make sure that it did not change.
154-
// if the parent changes that means it died and we were reparented to something else so we should
155-
// just kill ourself and not cause problems for someone else.
153+
// Compare the parent from the initial start of the init process and make
154+
// sure that it did not change. if the parent changes that means it died
155+
// and we were reparented to something else so we should just kill ourself
156+
// and not cause problems for someone else.
156157
if unix.Getppid() != l.parentPid {
157158
return unix.Kill(unix.Getpid(), unix.SIGKILL)
158159
}
159-
// check for the arg before waiting to make sure it exists and it is returned
160-
// as a create time error.
160+
// Check for the arg before waiting to make sure it exists and it is
161+
// returned as a create time error.
161162
name, err := exec.LookPath(l.config.Args[0])
162163
if err != nil {
163164
return err
164165
}
165-
// close the pipe to signal that we have completed our init.
166+
// Close the pipe to signal that we have completed our init.
166167
l.pipe.Close()
167168
// Wait for the FIFO to be opened on the other side before exec-ing the
168169
// user process. We open it through /proc/self/fd/$fd, because the fd that
169170
// was given to us was an O_PATH fd to the fifo itself. Linux allows us to
170171
// re-open an O_PATH fd through /proc.
171172
fd, err := unix.Open(fmt.Sprintf("/proc/self/fd/%d", l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0)
172173
if err != nil {
173-
return newSystemErrorWithCause(err, "openat exec fifo")
174+
return newSystemErrorWithCause(err, "open exec fifo")
174175
}
175176
if _, err := unix.Write(fd, []byte("0")); err != nil {
176177
return newSystemErrorWithCause(err, "write 0 exec fifo")
177178
}
179+
// Close the O_PATH fifofd fd before exec because the kernel resets
180+
// dumpable in the wrong order. This has been fixed in newer kernels, but
181+
// we keep this to ensure CVE-2016-9962 doesn't re-emerge on older kernels.
182+
// N.B. the core issue itself (passing dirfds to the host filesystem) has
183+
// since been resolved.
184+
// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
185+
unix.Close(l.fifoFd)
186+
// Set seccomp as close to execve as possible, so as few syscalls take
187+
// place afterward (reducing the amount of syscalls that users need to
188+
// enable in their seccomp profiles).
178189
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
179190
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
180191
return newSystemErrorWithCause(err, "init seccomp")
181192
}
182193
}
183-
// close the statedir fd before exec because the kernel resets dumpable in the wrong order
184-
// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
185-
unix.Close(l.fifoFd)
186194
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
187195
return newSystemErrorWithCause(err, "exec user process")
188196
}

0 commit comments

Comments
 (0)