Skip to content

Commit bc9e857

Browse files
committed
executor: detect containers killed by OOMKiller
If container exits with error and has invoked OOMKiller mark the origin error as ENOMEM so that it can be detected on the client side. gRPC will set ENOMEM as codes.ResouceExhausted based on moby#5182 Signed-off-by: Tonis Tiigi <[email protected]>
1 parent b04830b commit bc9e857

File tree

2 files changed

+52
-3
lines changed

2 files changed

+52
-3
lines changed

executor/runcexecutor/executor.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ func (w *runcExecutor) Run(ctx context.Context, id string, root executor.Mount,
335335
}
336336
doReleaseNetwork = false
337337

338-
err = exitError(ctx, err)
338+
err = exitError(ctx, cgroupPath, err)
339339
if err != nil {
340340
if rec != nil {
341341
rec.Close()
@@ -351,7 +351,7 @@ func (w *runcExecutor) Run(ctx context.Context, id string, root executor.Mount,
351351
return rec, rec.CloseAsync(releaseContainer)
352352
}
353353

354-
func exitError(ctx context.Context, err error) error {
354+
func exitError(ctx context.Context, cgroupPath string, err error) error {
355355
if err != nil {
356356
exitErr := &gatewayapi.ExitError{
357357
ExitCode: gatewayapi.UnknownExitStatus,
@@ -363,6 +363,9 @@ func exitError(ctx context.Context, err error) error {
363363
ExitCode: uint32(runcExitError.Status),
364364
}
365365
}
366+
367+
detectOOM(ctx, cgroupPath, exitErr)
368+
366369
trace.SpanFromContext(ctx).AddEvent(
367370
"Container exited",
368371
trace.WithAttributes(
@@ -453,7 +456,7 @@ func (w *runcExecutor) Exec(ctx context.Context, id string, process executor.Pro
453456
}
454457

455458
err = w.exec(ctx, id, spec.Process, process, nil)
456-
return exitError(ctx, err)
459+
return exitError(ctx, "", err)
457460
}
458461

459462
type forwardIO struct {

executor/runcexecutor/executor_linux.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,19 @@
11
package runcexecutor
22

33
import (
4+
"bufio"
45
"context"
56
"io"
67
"os"
8+
"path/filepath"
9+
"strconv"
10+
"strings"
711
"syscall"
812

913
"github.com/containerd/console"
1014
runc "github.com/containerd/go-runc"
1115
"github.com/moby/buildkit/executor"
16+
gatewayapi "github.com/moby/buildkit/frontend/gateway/pb"
1217
"github.com/moby/buildkit/util/bklog"
1318
"github.com/moby/sys/signal"
1419
"github.com/opencontainers/runtime-spec/specs-go"
@@ -172,3 +177,44 @@ func (w *runcExecutor) callWithIO(ctx context.Context, process executor.ProcessI
172177

173178
return call(ctx, startedCh, runcIO, killer.pidfile)
174179
}
180+
181+
func detectOOM(ctx context.Context, ns string, gwErr *gatewayapi.ExitError) {
182+
const defaultCgroupMountpoint = "/sys/fs/cgroup"
183+
184+
if ns == "" {
185+
return
186+
}
187+
188+
count, err := readMemoryEvent(filepath.Join(defaultCgroupMountpoint, ns), "oom_kill")
189+
if err != nil {
190+
bklog.G(ctx).WithError(err).Warn("failed to read oom_kill event")
191+
return
192+
}
193+
if count > 0 {
194+
gwErr.Err = syscall.ENOMEM
195+
}
196+
}
197+
198+
func readMemoryEvent(fp string, event string) (uint64, error) {
199+
f, err := os.Open(filepath.Join(fp, "memory.events"))
200+
if err != nil {
201+
return 0, err
202+
}
203+
defer f.Close()
204+
205+
s := bufio.NewScanner(f)
206+
for s.Scan() {
207+
parts := strings.Fields(s.Text())
208+
if len(parts) != 2 {
209+
continue
210+
}
211+
if parts[0] != event {
212+
continue
213+
}
214+
v, err := strconv.ParseUint(parts[1], 10, 64)
215+
if err == nil {
216+
return v, nil
217+
}
218+
}
219+
return 0, s.Err()
220+
}

0 commit comments

Comments
 (0)