Skip to content

Commit 5168f15

Browse files
Merge pull request #110 from Roblox/stdout_stderr_issue
Fix: Stderr and stdout of existing processes are lost after a restart of nomad
2 parents 42df527 + 7f50f0f commit 5168f15

File tree

4 files changed

+51
-9
lines changed

4 files changed

+51
-9
lines changed

containerd/containerd.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -341,12 +341,7 @@ func (d *Driver) loadContainer(id string) (containerd.Container, error) {
341341
}
342342

343343
func (d *Driver) createTask(container containerd.Container, stdoutPath, stderrPath string) (containerd.Task, error) {
344-
stdout, err := openFIFO(stdoutPath)
345-
if err != nil {
346-
return nil, err
347-
}
348-
349-
stderr, err := openFIFO(stderrPath)
344+
stdout, stderr, err := getStdoutStderrFifos(stdoutPath, stderrPath)
350345
if err != nil {
351346
return nil, err
352347
}
@@ -357,9 +352,14 @@ func (d *Driver) createTask(container containerd.Container, stdoutPath, stderrPa
357352
return container.NewTask(ctxWithTimeout, cio.NewCreator(cio.WithStreams(nil, stdout, stderr)))
358353
}
359354

360-
func (d *Driver) getTask(container containerd.Container) (containerd.Task, error) {
355+
func (d *Driver) getTask(container containerd.Container, stdoutPath, stderrPath string) (containerd.Task, error) {
356+
stdout, stderr, err := getStdoutStderrFifos(stdoutPath, stderrPath)
357+
if err != nil {
358+
return nil, err
359+
}
360+
361361
ctxWithTimeout, cancel := context.WithTimeout(d.ctxContainerd, 30*time.Second)
362362
defer cancel()
363363

364-
return container.Task(ctxWithTimeout, cio.Load)
364+
return container.Task(ctxWithTimeout, cio.NewAttach(cio.WithStreams(nil, stdout, stderr)))
365365
}

containerd/driver.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,8 @@ type TaskConfig struct {
207207
type TaskState struct {
208208
StartedAt time.Time
209209
ContainerName string
210+
StdoutPath string
211+
StderrPath string
210212
}
211213

212214
type Driver struct {
@@ -496,6 +498,8 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drive
496498
driverState := TaskState{
497499
StartedAt: h.startedAt,
498500
ContainerName: containerName,
501+
StdoutPath: cfg.StdoutPath,
502+
StderrPath: cfg.StderrPath,
499503
}
500504

501505
if err := handle.SetDriverState(&driverState); err != nil {
@@ -539,7 +543,7 @@ func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
539543
return fmt.Errorf("Error in recovering container: %v", err)
540544
}
541545

542-
task, err := d.getTask(container)
546+
task, err := d.getTask(container, taskState.StdoutPath, taskState.StderrPath)
543547
if err != nil {
544548
return fmt.Errorf("Error in recovering task: %v", err)
545549
}

containerd/utils.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,20 @@ func buildMountpoint(mountType, mountTarget, mountSource string, mountOptions []
3737
return m
3838
}
3939

40+
// getStdoutStderrFifos return the container's stdout and stderr FIFO's.
41+
func getStdoutStderrFifos(stdoutPath, stderrPath string) (*os.File, *os.File, error) {
42+
stdout, err := openFIFO(stdoutPath)
43+
if err != nil {
44+
return nil, nil, err
45+
}
46+
47+
stderr, err := openFIFO(stderrPath)
48+
if err != nil {
49+
return nil, nil, err
50+
}
51+
return stdout, stderr, nil
52+
}
53+
4054
// FIFO's are named pipes in linux.
4155
// openFIFO() opens the nomad task stdout/stderr pipes and returns the fd.
4256
func openFIFO(path string) (*os.File, error) {

tests/run_tests.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ main() {
2929
setup
3030
echo "INFO: Checking if nomad-driver-containerd is up and running, and nomad is ready to accept jobs."
3131
is_containerd_driver_active
32+
is_nomad_ready
3233

3334
run_tests $@
3435
exit $PASS_STATUS
@@ -202,4 +203,27 @@ is_containerd_driver_active() {
202203
fi
203204
}
204205

206+
is_nomad_ready() {
207+
i="0"
208+
while test $i -lt 5
209+
do
210+
set +e
211+
status=$(curl -s http://127.0.0.1:4646/v1/nodes|jq '.[0] ."Status"')
212+
rc=$?
213+
set -e
214+
if [[ $rc -eq 0 && $status = \"ready\" ]]; then
215+
echo "INFO: nomad is ready to accept jobs."
216+
break
217+
fi
218+
echo "INFO: nomad is initializing, sleep for 4 seconds."
219+
sleep 4s
220+
i=$[$i+1]
221+
done
222+
223+
if [ $i -ge 5 ]; then
224+
echo "ERROR: nomad didn't come up. exit 1."
225+
exit 1
226+
fi
227+
}
228+
205229
main "$@"

0 commit comments

Comments
 (0)