Skip to content

Commit 5b51e6f

Browse files
authored
Merge pull request firecracker-microvm#432 from kzys/stopvm-cleanup
Close all Unix abstract sockets correctly
2 parents cdebca4 + 2d48838 commit 5b51e6f

File tree

2 files changed

+65
-16
lines changed

2 files changed

+65
-16
lines changed

firecracker-control/local.go

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ import (
2121
"os/exec"
2222
"strconv"
2323
"strings"
24+
"sync"
2425
"syscall"
26+
"time"
2527

2628
"github.com/containerd/containerd/identifiers"
2729
"github.com/containerd/containerd/log"
@@ -30,6 +32,7 @@ import (
3032
"github.com/containerd/containerd/runtime/v2/shim"
3133
"github.com/containerd/containerd/sys"
3234
"github.com/golang/protobuf/ptypes/empty"
35+
"github.com/hashicorp/go-multierror"
3336
"github.com/pkg/errors"
3437
"github.com/sirupsen/logrus"
3538
"google.golang.org/grpc/codes"
@@ -47,6 +50,7 @@ import (
4750
var (
4851
_ fccontrolTtrpc.FirecrackerService = (*local)(nil)
4952
ttrpcAddressEnv = "TTRPC_ADDRESS"
53+
stopVMInterval = 10 * time.Millisecond
5054
)
5155

5256
func init() {
@@ -64,6 +68,9 @@ type local struct {
6468
containerdAddress string
6569
logger *logrus.Entry
6670
config *config.Config
71+
72+
processesMu sync.Mutex
73+
processes map[string]int32
6774
}
6875

6976
func newLocal(ic *plugin.InitContext) (*local, error) {
@@ -80,6 +87,7 @@ func newLocal(ic *plugin.InitContext) (*local, error) {
8087
containerdAddress: ic.Address,
8188
logger: log.G(ic.Context),
8289
config: cfg,
90+
processes: make(map[string]int32),
8391
}, nil
8492
}
8593

@@ -116,7 +124,7 @@ func (s *local) CreateVM(requestCtx context.Context, req *proto.CreateVMRequest)
116124

117125
shimSocket, err := shim.NewSocket(shimSocketAddress)
118126
if isEADDRINUSE(err) {
119-
return nil, status.Errorf(codes.AlreadyExists, "VM with ID %q already exists", id)
127+
return nil, status.Errorf(codes.AlreadyExists, "VM with ID %q already exists (socket: %q)", id, shimSocketAddress)
120128
} else if err != nil {
121129
err = errors.Wrapf(err, "failed to open shim socket at address %q", shimSocketAddress)
122130
s.logger.WithError(err).Error()
@@ -199,9 +207,17 @@ func (s *local) CreateVM(requestCtx context.Context, req *proto.CreateVMRequest)
199207
return nil, err
200208
}
201209

210+
s.addShim(shimSocketAddress, cmd)
211+
202212
return resp, nil
203213
}
204214

215+
func (s *local) addShim(address string, cmd *exec.Cmd) {
216+
s.processesMu.Lock()
217+
defer s.processesMu.Unlock()
218+
s.processes[address] = int32(cmd.Process.Pid)
219+
}
220+
205221
func (s *local) shimFirecrackerClient(requestCtx context.Context, vmID string) (*fcclient.Client, error) {
206222
if err := identifiers.Validate(vmID); err != nil {
207223
return nil, errors.Wrap(err, "invalid id")
@@ -224,16 +240,34 @@ func (s *local) StopVM(requestCtx context.Context, req *proto.StopVMRequest) (*e
224240
if err != nil {
225241
return nil, err
226242
}
227-
228243
defer client.Close()
229244

230-
resp, err := client.StopVM(requestCtx, req)
245+
resp, shimErr := client.StopVM(requestCtx, req)
246+
waitErr := s.waitForShimToExit(requestCtx, req.VMID)
247+
248+
// Assuming the shim is returning containerd's error code, return the error as is if possible.
249+
if waitErr == nil {
250+
return resp, shimErr
251+
}
252+
return resp, multierror.Append(shimErr, waitErr).ErrorOrNil()
253+
}
254+
255+
func (s *local) waitForShimToExit(ctx context.Context, vmID string) error {
256+
socketAddr, err := fcShim.SocketAddress(ctx, vmID)
231257
if err != nil {
232-
s.logger.WithError(err).Error()
233-
return nil, err
258+
return err
259+
}
260+
261+
s.processesMu.Lock()
262+
defer s.processesMu.Unlock()
263+
264+
pid, ok := s.processes[socketAddr]
265+
if !ok {
266+
return errors.Errorf("failed to find a shim process for %q", socketAddr)
234267
}
268+
defer delete(s.processes, socketAddr)
235269

236-
return resp, err
270+
return internal.WaitForPidToExit(ctx, stopVMInterval, pid)
237271
}
238272

239273
// GetVMInfo returns metadata for the VM with the given VMID.
@@ -387,6 +421,15 @@ func (s *local) newShim(ns, vmID, containerdAddress string, shimSocket *net.Unix
387421
logger.WithError(err).Error("shim has been unexpectedly terminated")
388422
}
389423
}
424+
425+
// Close all Unix abstract sockets.
426+
if err := shimSocketFile.Close(); err != nil {
427+
logger.WithError(err).Errorf("failed to close %q", shimSocketFile.Name())
428+
}
429+
if err := fcSocketFile.Close(); err != nil {
430+
logger.WithError(err).Errorf("failed to close %q", fcSocketFile.Name())
431+
}
432+
390433
if err := os.RemoveAll(shimDir.RootPath()); err != nil {
391434
logger.WithError(err).Errorf("failed to remove %q", shimDir.RootPath())
392435
}

runtime/service_integ_test.go

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,7 @@ func TestLongUnixSocketPath_Isolated(t *testing.T) {
583583
// default location we store state results in a path like
584584
// "/run/firecracker-containerd/default/<vmID>" (with len 112).
585585
const maxUnixSockLen = 108
586-
vmID := strings.Repeat("x", 72)
586+
vmID := strings.Repeat("x", 76)
587587

588588
ctx := namespaces.WithNamespace(context.Background(), "default")
589589

@@ -597,16 +597,14 @@ func TestLongUnixSocketPath_Isolated(t *testing.T) {
597597
{
598598
name: "Without Jailer",
599599
request: proto.CreateVMRequest{
600-
VMID: vmID + "noop",
600+
VMID: vmID,
601601
NetworkInterfaces: []*proto.FirecrackerNetworkInterface{},
602602
},
603603
},
604604
{
605605
name: "With Jailer",
606606
request: proto.CreateVMRequest{
607-
// We somehow cannot use the same VM ID here.
608-
// https://github.com/firecracker-microvm/firecracker-containerd/issues/409
609-
VMID: vmID + "jail",
607+
VMID: vmID,
610608
NetworkInterfaces: []*proto.FirecrackerNetworkInterface{},
611609
JailerConfig: &proto.JailerConfig{
612610
UID: 30000,
@@ -645,6 +643,14 @@ func TestLongUnixSocketPath_Isolated(t *testing.T) {
645643

646644
_, err = fcClient.StopVM(ctx, &proto.StopVMRequest{VMID: vmID})
647645
require.NoError(t, err)
646+
647+
matches, err := findProcess(ctx, findShim)
648+
require.NoError(t, err)
649+
require.Empty(t, matches)
650+
651+
matches, err = findProcess(ctx, findFirecracker)
652+
require.NoError(t, err)
653+
require.Empty(t, matches)
648654
})
649655
}
650656
}
@@ -1566,18 +1572,18 @@ func TestStopVM_Isolated(t *testing.T) {
15661572

15671573
test.stopFunc(ctx, fcClient, createVMRequest)
15681574

1569-
// If the function above uses StopVMRequest,
1570-
// shim guarantees that the underlying Firecracker process is dead
1575+
// If the function above uses StopVMRequest, all underlying processes must be dead
15711576
// (either gracefully or forcibly) at the end of the request.
15721577
if test.withStopVM {
15731578
fcExists, err := process.PidExists(fcProcess.Pid)
15741579
require.NoError(err, "failed to find firecracker")
15751580
require.False(fcExists, "firecracker %s is still there", vmID)
1581+
1582+
shimExists, err := process.PidExists(shimProcess.Pid)
1583+
require.NoError(err, "failed to find shim")
1584+
require.False(shimExists, "shim %s is still there", vmID)
15761585
}
15771586

1578-
// Since the shim is the one which is writing the response,
1579-
// it cannot guarantee that the shim itself is dead at the end of the request.
1580-
// But it should be dead eventually.
15811587
err = internal.WaitForPidToExit(ctx, time.Second, shimProcess.Pid)
15821588
require.NoError(err, "shim hasn't been terminated")
15831589
}

0 commit comments

Comments
 (0)