@@ -11,6 +11,7 @@ import (
1111 "runtime"
1212 "strconv"
1313 "sync"
14+ _ "unsafe" // for go:linkname
1415
1516 securejoin "github.com/cyphar/filepath-securejoin"
1617 "github.com/sirupsen/logrus"
@@ -53,14 +54,11 @@ func haveCloseRangeCloexec() bool {
5354 return haveCloseRangeCloexecBool
5455}
5556
56- // CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for
57- // the process (except for those below the given fd value).
58- func CloseExecFrom (minFd int ) error {
59- if haveCloseRangeCloexec () {
60- err := unix .CloseRange (uint (minFd ), math .MaxUint , unix .CLOSE_RANGE_CLOEXEC )
61- return os .NewSyscallError ("close_range" , err )
62- }
57+ type fdFunc func (fd int )
6358
59+ // fdRangeFrom calls the passed fdFunc for each file descriptor that is open in
60+ // the current process.
61+ func fdRangeFrom (minFd int , fn fdFunc ) error {
6462 procSelfFd , closer := ProcThreadSelf ("fd" )
6563 defer closer ()
6664
@@ -88,15 +86,67 @@ func CloseExecFrom(minFd int) error {
8886 if fd < minFd {
8987 continue
9088 }
91- // Intentionally ignore errors from unix.CloseOnExec -- the cases where
92- // this might fail are basically file descriptors that have already
93- // been closed (including and especially the one that was created when
94- // os.ReadDir did the "opendir" syscall).
95- unix .CloseOnExec (fd )
89+ // Ignore the file descriptor we used for readdir, as it will be closed
90+ // when we return.
91+ if uintptr (fd ) == fdDir .Fd () {
92+ continue
93+ }
94+ // Run the closure.
95+ fn (fd )
9696 }
9797 return nil
9898}
9999
100+ // CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or
101+ // equal to minFd in the current process.
102+ func CloseExecFrom (minFd int ) error {
103+ // Use close_range(CLOSE_RANGE_CLOEXEC) if possible.
104+ if haveCloseRangeCloexec () {
105+ err := unix .CloseRange (uint (minFd ), math .MaxUint , unix .CLOSE_RANGE_CLOEXEC )
106+ return os .NewSyscallError ("close_range" , err )
107+ }
108+ // Otherwise, fall back to the standard loop.
109+ return fdRangeFrom (minFd , unix .CloseOnExec )
110+ }
111+
112+ //go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor
113+
114+ // In order to make sure we do not close the internal epoll descriptors the Go
115+ // runtime uses, we need to ensure that we skip descriptors that match
116+ // "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing,
117+ // unfortunately there's no other way to be sure we're only keeping the file
118+ // descriptors the Go runtime needs. Hopefully nothing blows up doing this...
119+ func runtime_IsPollDescriptor (fd uintptr ) bool //nolint:revive
120+
121+ // UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the
122+ // current process, except for those critical to Go's runtime (such as the
123+ // netpoll management descriptors).
124+ //
125+ // NOTE: That this function is incredibly dangerous to use in most Go code, as
126+ // closing file descriptors from underneath *os.File handles can lead to very
127+ // bad behaviour (the closed file descriptor can be re-used and then any
128+ // *os.File operations would apply to the wrong file). This function is only
129+ // intended to be called from the last stage of runc init.
130+ func UnsafeCloseFrom (minFd int ) error {
131+ // We cannot use close_range(2) even if it is available, because we must
132+ // not close some file descriptors.
133+ return fdRangeFrom (minFd , func (fd int ) {
134+ if runtime_IsPollDescriptor (uintptr (fd )) {
135+ // These are the Go runtimes internal netpoll file descriptors.
136+ // These file descriptors are operated on deep in the Go scheduler,
137+ // and closing those files from underneath Go can result in panics.
138+ // There is no issue with keeping them because they are not
139+ // executable and are not useful to an attacker anyway. Also we
140+ // don't have any choice.
141+ return
142+ }
143+ // There's nothing we can do about errors from close(2), and the
144+ // only likely error to be seen is EBADF which indicates the fd was
145+ // already closed (in which case, we got what we wanted).
146+ _ = unix .Close (fd )
147+ })
148+ }
149+
100150// NewSockPair returns a new SOCK_STREAM unix socket pair.
101151func NewSockPair (name string ) (parent , child * os.File , err error ) {
102152 fds , err := unix .Socketpair (unix .AF_LOCAL , unix .SOCK_STREAM | unix .SOCK_CLOEXEC , 0 )
0 commit comments