Skip to content

Commit ad547ec

Browse files
committed
Remove a shim directory on StopVM() correctly and synchronously
Previously the cleanup logic is in firecracker-control, but it doesn't work correctly with a runc-based jailer, since the jailer is making files without namespaces. Morever the cleanup logic is running after StopVM() that makes our contracts unclear. We should guarantee all resource are deleted at the end of StopVM(). Signed-off-by: Kazuyoshi Kato <[email protected]>
1 parent 4081b0b commit ad547ec

File tree

4 files changed

+52
-12
lines changed

4 files changed

+52
-12
lines changed

runtime/jailer_integ_test.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,15 @@ package main
1515

1616
import (
1717
"context"
18+
"os"
19+
"path/filepath"
1820
"testing"
1921

2022
"github.com/containerd/containerd"
2123
"github.com/containerd/containerd/namespaces"
2224
"github.com/containerd/containerd/oci"
2325
"github.com/containerd/containerd/pkg/ttrpcutil"
26+
"github.com/stretchr/testify/assert"
2427
"github.com/stretchr/testify/require"
2528

2629
_ "github.com/firecracker-microvm/firecracker-containerd/firecracker-control"
@@ -78,10 +81,19 @@ func testJailer(t *testing.T, jailerConfig *proto.JailerConfig) {
7881
stdout := startAndWaitTask(ctx, t, c)
7982
require.Equal("hello", stdout)
8083

81-
defer func() {
82-
err := c.Delete(ctx, containerd.WithSnapshotCleanup)
83-
require.NoError(err, "failed to delete a container")
84-
}()
84+
stat, err := os.Stat(filepath.Join(shimBaseDir(), "default", vmID))
85+
require.NoError(err)
86+
assert.True(t, stat.IsDir())
87+
88+
err = c.Delete(ctx, containerd.WithSnapshotCleanup)
89+
require.NoError(err, "failed to delete a container")
90+
91+
_, err = fcClient.StopVM(ctx, &proto.StopVMRequest{VMID: vmID})
92+
require.NoError(err)
93+
94+
_, err = os.Stat(filepath.Join(shimBaseDir(), "default", vmID))
95+
assert.Error(t, err)
96+
assert.True(t, os.IsNotExist(err))
8597
}
8698

8799
func TestJailerCPUSet_Isolated(t *testing.T) {

runtime/noop_jailer.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,4 +118,6 @@ func (j *noopJailer) Stop() error {
118118
return err
119119
}
120120

121-
func (j *noopJailer) Close() error { return nil }
121+
func (j *noopJailer) Close() error {
122+
return os.RemoveAll(j.shimDir.RootPath())
123+
}

runtime/runc_jailer.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@ import (
2323
"path/filepath"
2424
"strings"
2525
"syscall"
26+
"time"
2627

2728
"github.com/containerd/go-runc"
2829
"github.com/firecracker-microvm/firecracker-go-sdk"
30+
"github.com/hashicorp/go-multierror"
2931
"github.com/opencontainers/runtime-spec/specs-go"
3032
"github.com/pkg/errors"
3133
"github.com/sirupsen/logrus"
@@ -466,9 +468,18 @@ func (j *runcJailer) setDefaultConfigValues(cfg *config.Config, socketPath strin
466468
// Close will cleanup the container that may be left behind if the jailing
467469
// process was killed via SIGKILL.
468470
func (j *runcJailer) Close() error {
469-
return j.runcClient.Delete(j.ctx, j.vmID, &runc.DeleteOpts{
470-
Force: true,
471-
})
471+
// Even the jailer's associated context is cancelled,
472+
// we'd like to do the cleanups below just in case.
473+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
474+
defer cancel()
475+
476+
// Delete the container, if it is still running.
477+
runcErr := j.runcClient.Delete(ctx, j.vmID, &runc.DeleteOpts{Force: true})
478+
479+
// Regardless of the result, remove the directory.
480+
removeErr := os.RemoveAll(j.OCIBundlePath())
481+
482+
return multierror.Append(runcErr, removeErr).ErrorOrNil()
472483
}
473484

474485
func mkdirAndChown(path string, mode os.FileMode, uid, gid uint32) error {

runtime/service.go

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -492,8 +492,13 @@ func (s *service) createVM(requestCtx context.Context, request *proto.CreateVMRe
492492
}
493493
}()
494494

495+
namespace, ok := namespaces.Namespace(s.shimCtx)
496+
if !ok {
497+
namespace = namespaces.Default
498+
}
499+
495500
s.logger.Info("creating new VM")
496-
s.jailer, err = newJailer(s.shimCtx, s.logger, filepath.Join(s.config.ShimBaseDir, s.vmID), s, request)
501+
s.jailer, err = newJailer(s.shimCtx, s.logger, filepath.Join(s.config.ShimBaseDir, namespace, s.vmID), s, request)
497502
if err != nil {
498503
return errors.Wrap(err, "failed to create jailer")
499504
}
@@ -1165,11 +1170,21 @@ func (s *service) shutdown(
11651170
s.shutdownLoop(requestCtx, timeout, req)
11661171
}()
11671172

1168-
err := s.machine.Wait(context.Background())
1169-
if err == nil {
1173+
var result *multierror.Error
1174+
if err := s.machine.Wait(context.Background()); err != nil {
1175+
result = multierror.Append(result, err)
1176+
}
1177+
if err := s.jailer.Close(); err != nil {
1178+
result = multierror.Append(result, err)
1179+
}
1180+
if result == nil {
11701181
return nil
11711182
}
1172-
return status.Error(codes.Internal, fmt.Sprintf("the VMM was killed forcibly: %v", err))
1183+
1184+
if err := result.ErrorOrNil(); err != nil {
1185+
return status.Error(codes.Internal, fmt.Sprintf("the VMM was killed forcibly: %v", err))
1186+
}
1187+
return nil
11731188
}
11741189

11751190
// shutdownLoop sends multiple different shutdown requests to stop the VMM.

0 commit comments

Comments
 (0)