Skip to content

Commit 8d3e6c9

Browse files
williammartinCraig Furman
authored andcommitted
Avoid race when opening exec fifo
When starting a container with `runc start` or `runc run`, the stub process (runc[2:INIT]) opens a fifo for writing. Its parent runc process will open the same fifo for reading. In this way, they synchronize. If the stub process exits at the wrong time, the parent runc process will block forever. This can happen when racing 2 runc operations against each other: `runc run/start`, and `runc delete`. It could also happen for other reasons, e.g. the kernel's OOM killer may select the stub process. This commit resolves this race by racing the opening of the exec fifo from the runc parent process against the stub process exiting. If the stub process exits before we open the fifo, we return an error. Another solution is to wait on the stub process. However, it seems it would require more refactoring to avoid calling wait multiple times on the same process, which is an error. Signed-off-by: Craig Furman <[email protected]>
1 parent ab4a819 commit 8d3e6c9

File tree

1 file changed

+60
-9
lines changed

1 file changed

+60
-9
lines changed

libcontainer/container_linux.go

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package libcontainer
55
import (
66
"bytes"
77
"encoding/json"
8+
"errors"
89
"fmt"
910
"io"
1011
"io/ioutil"
@@ -267,20 +268,70 @@ func (c *linuxContainer) Exec() error {
267268

268269
func (c *linuxContainer) exec() error {
269270
path := filepath.Join(c.root, execFifoFilename)
270-
f, err := os.OpenFile(path, os.O_RDONLY, 0)
271-
if err != nil {
272-
return newSystemErrorWithCause(err, "open exec fifo for reading")
271+
272+
fifoOpen := make(chan struct{})
273+
select {
274+
case <-awaitProcessExit(c.initProcess.pid(), fifoOpen):
275+
return errors.New("container process is already dead")
276+
case result := <-awaitFifoOpen(path):
277+
close(fifoOpen)
278+
if result.err != nil {
279+
return result.err
280+
}
281+
f := result.file
282+
defer f.Close()
283+
if err := readFromExecFifo(f); err != nil {
284+
return err
285+
}
286+
return os.Remove(path)
273287
}
274-
defer f.Close()
275-
data, err := ioutil.ReadAll(f)
288+
}
289+
290+
func readFromExecFifo(execFifo io.Reader) error {
291+
data, err := ioutil.ReadAll(execFifo)
276292
if err != nil {
277293
return err
278294
}
279-
if len(data) > 0 {
280-
os.Remove(path)
281-
return nil
295+
if len(data) <= 0 {
296+
return fmt.Errorf("cannot start an already running container")
282297
}
283-
return fmt.Errorf("cannot start an already running container")
298+
return nil
299+
}
300+
301+
func awaitProcessExit(pid int, exit <-chan struct{}) <-chan struct{} {
302+
isDead := make(chan struct{})
303+
go func() {
304+
for {
305+
select {
306+
case <-exit:
307+
return
308+
case <-time.After(time.Millisecond * 100):
309+
stat, err := system.Stat(pid)
310+
if err != nil || stat.State == system.Zombie {
311+
close(isDead)
312+
return
313+
}
314+
}
315+
}
316+
}()
317+
return isDead
318+
}
319+
320+
func awaitFifoOpen(path string) <-chan openResult {
321+
fifoOpened := make(chan openResult)
322+
go func() {
323+
f, err := os.OpenFile(path, os.O_RDONLY, 0)
324+
if err != nil {
325+
fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
326+
}
327+
fifoOpened <- openResult{file: f}
328+
}()
329+
return fifoOpened
330+
}
331+
332+
type openResult struct {
333+
file *os.File
334+
err error
284335
}
285336

286337
func (c *linuxContainer) start(process *Process, isInit bool) error {

0 commit comments

Comments
 (0)