Skip to content

Commit b513929

Browse files
committed
Restore runc kill all behavior for init processes
PR #2538 removed the `runc kill --all` flag, when signaling containers. However, when cleaning up after the init process exists, the `--all` flag is still needed to remove any potentially orphaned processes when using runc before v1.2. See: opencontainers/runc@f8ad20f#diff-ade6035c3e554d7627cdc368b27f475fc0dad83e02382a1dea9cae9b75871087 Additionally, switch to using error strings directly from runc code in `internal\guest\runtime\runc\utils.go`: they have been available since runc v1.1.0-rc.1. See: opencontainers/runc#3033 Also, add logic to match on container not/still running error strings and return them for `Kill`, since returning `ERROR_VMCOMPUTE_SYSTEM_ALREADY_STOPPED` (`0xc0370110`) when killing a stopped container is expected behavior and handled appropriately in `"cmd/containerd-shim-runhcs-v1".(*hcsExec).Kill()`. Signed-off-by: Hamza El-Saawy <hamzaelsaawy@microsoft.com>
1 parent 31ea91d commit b513929

File tree

6 files changed

+45
-17
lines changed

6 files changed

+45
-17
lines changed

cmd/containerd-shim-runhcs-v1/task_hcs.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ func (ht *hcsTask) KillExec(ctx context.Context, eid string, signal uint32, all
446446
if signal == 0x9 && eid == "" && ht.host != nil {
447447
// If this is a SIGKILL against the init process we start a background
448448
// timer and wait on either the timer expiring or the process exiting
449-
// cleanly. If the timer exires first we forcibly close the UVM as we
449+
// cleanly. If the timer expires first we forcibly close the UVM as we
450450
// assume the guest is misbehaving for some reason.
451451
go func() {
452452
t := time.NewTimer(30 * time.Second)

internal/bridgeutils/gcserr/errors.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
// Hresult is a type corresponding to the HRESULT error type used on Windows.
1111
type Hresult int32
1212

13+
// ! Must match error values in internal\hcs\errors.go
1314
// from
1415
// - https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-erref/705fb797-2175-4a90-b5a3-3918024b10b8
1516
// - https://docs.microsoft.com/en-us/virtualization/api/hcs/reference/hcshresult
@@ -51,7 +52,7 @@ const (
5152
//
5253
// The virtual machine or container with the specified identifier is not
5354
// running.
54-
HrVmcomputeSystemAlreadyStopped = Hresult(-2143878896) // 0x80370110
55+
HrVmcomputeSystemAlreadyStopped = Hresult(-1070137072) // 0xC0370110
5556
)
5657

5758
// TODO: update implementation to use go1.13 style errors with `errors.As` and co.

internal/guest/runtime/hcsv2/container.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,10 @@ func (c *Container) GetAllProcessPids(ctx context.Context) ([]int, error) {
214214

215215
// Kill sends 'signal' to the container process.
216216
func (c *Container) Kill(ctx context.Context, signal syscall.Signal) error {
217-
log.G(ctx).WithField(logfields.ContainerID, c.id).Info("opengcs::Container::Kill")
217+
log.G(ctx).WithFields(logrus.Fields{
218+
logfields.ContainerID: c.id,
219+
"signal": signal.String(),
220+
}).Info("opengcs::Container::Kill")
218221
err := c.container.Kill(signal)
219222
if err != nil {
220223
return err

internal/guest/runtime/runc/container.go

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,40 @@ func (c *container) ExecProcess(process *oci.Process, stdioSet *stdio.Connection
7676

7777
// Kill sends the specified signal to the container's init process.
7878
func (c *container) Kill(signal syscall.Signal) error {
79-
logrus.WithField(logfields.ContainerID, c.id).Debug("runc::container::Kill")
79+
logrus.WithFields(logrus.Fields{
80+
logfields.ContainerID: c.id,
81+
"signal": signal.String(),
82+
}).Debug("runc::container::Kill")
83+
return c.kill(signal, false)
84+
}
85+
86+
// killAll terminates all processes started in the container.
87+
//
88+
// Note: [runc deprecated] the `kill --all` flag starting in v1.2, but, prior to that, it was required
89+
// to kill all processes within the container after the init exits.
90+
// Until we can guarantee that the runc version is greater than 1.1 and runc explicitly removes the option,
91+
// keep using it here.
92+
// This mirrors how upstream containerd's runc handles [init exit] via [kill all].
93+
//
94+
// [runc deprecated]: https://github.com/opencontainers/runc/pull/3825
95+
// [init exit]: https://github.com/containerd/containerd/blob/48baa31a0ad1ca1121ddaf968d3b8aa68c40bf84/cmd/containerd-shim-runc-v2/task/service.go#L725
96+
// [kill all]: https://github.com/containerd/containerd/blob/48baa31a0ad1ca1121ddaf968d3b8aa68c40bf84/cmd/containerd-shim-runc-v2/process/init.go#L375
97+
func (c *container) killAll() error {
98+
logrus.WithField(logfields.ContainerID, c.id).Debug("runc::container::killAll")
99+
return c.kill(syscall.SIGKILL, true)
100+
}
101+
102+
func (c *container) kill(signal syscall.Signal, all bool) error {
80103
args := []string{"kill"}
104+
if all {
105+
args = append(args, "--all")
106+
}
81107
args = append(args, c.id, strconv.Itoa(int(signal)))
82108
cmd := runcCommand(args...)
83109
out, err := cmd.CombinedOutput()
84110
if err != nil {
85111
runcErr := parseRuncError(string(out))
86-
return errors.Wrapf(runcErr, "unknown runc error after kill %v: %s", err, string(out))
112+
return errors.Wrapf(runcErr, "runc kill failed with %v: %s", err, string(out))
87113
}
88114
return nil
89115
}

internal/guest/runtime/runc/process.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
package runc
55

66
import (
7-
"syscall"
8-
97
"github.com/Microsoft/hcsshim/internal/guest/runtime"
108
"github.com/Microsoft/hcsshim/internal/guest/stdio"
119
"github.com/Microsoft/hcsshim/internal/logfields"
@@ -65,7 +63,7 @@ func (p *process) Wait() (int, error) {
6563
// If the init process of a pid namespace terminates, the kernel
6664
// terminates all other processes in the namespace with SIGKILL. We
6765
// simulate the same behavior.
68-
if err := p.c.Kill(syscall.SIGKILL); err != nil {
66+
if err := p.c.killAll(); err != nil {
6967
l.WithError(err).Error("failed to terminate container after process wait")
7068
}
7169
}

internal/guest/runtime/runc/utils.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"strings"
1313
"syscall"
1414

15+
"github.com/opencontainers/runc/libcontainer"
1516
"github.com/pkg/errors"
1617
"github.com/sirupsen/logrus"
1718

@@ -124,20 +125,19 @@ func (l *standardLogEntry) asError() (err error) {
124125
}
125126

126127
func parseRuncError(s string) (err error) {
127-
// TODO (helsaawy): match with errors from
128-
// https://github.com/opencontainers/runc/blob/master/libcontainer/error.go
129128
if strings.HasPrefix(s, "container") && strings.HasSuffix(s, "does not exist") {
130-
// currently: "container <container id> does not exist"
129+
// match "container %q does not exist" and [libcontainer.ErrNotExist]
131130
err = runtime.ErrContainerDoesNotExist
132-
} else if strings.Contains(s, "container with id exists") ||
133-
strings.Contains(s, "container with given ID already exists") {
131+
} else if strings.Contains(s, "container with id exists") || strings.Contains(s, libcontainer.ErrExist.Error()) {
134132
err = runtime.ErrContainerAlreadyExists
135-
} else if strings.Contains(s, "invalid id format") ||
136-
strings.Contains(s, "invalid container ID format") {
133+
} else if strings.Contains(s, "invalid id format") || strings.Contains(s, libcontainer.ErrInvalidID.Error()) {
137134
err = runtime.ErrInvalidContainerID
138-
} else if strings.Contains(s, "container") &&
139-
strings.Contains(s, "that is not stopped") {
135+
} else if strings.Contains(s, "container") && strings.Contains(s, "that is not stopped") {
140136
err = runtime.ErrContainerNotStopped
137+
} else if strings.Contains(s, libcontainer.ErrRunning.Error()) {
138+
err = runtime.ErrContainerStillRunning
139+
} else if strings.Contains(s, libcontainer.ErrNotRunning.Error()) {
140+
err = runtime.ErrContainerNotRunning
141141
} else {
142142
err = errors.New(s)
143143
}

0 commit comments

Comments
 (0)