Skip to content

Commit e3e2fb5

Browse files
committed
StopVM to do forceful shutdown if VM is in paused state
StopVM calls an agent inside the VM. If the VM is paused StopVM can be stuck as it cannot connect to the agent. To fix this, we implemented api in go-sdk to read instance state and here we call it internally to read the VM state. If it is paused then we do a forceful shutdown of the VM. Signed-off-by: Alakesh Haloi <[email protected]>
1 parent d090e38 commit e3e2fb5

File tree

2 files changed

+33
-1
lines changed

2 files changed

+33
-1
lines changed

runtime/service.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,8 @@ func (s *service) mountDrives(requestCtx context.Context) error {
636636

637637
// StopVM will shutdown the VMM. Unlike Shutdown, this method is exposed to containerd clients.
638638
// If the VM has not been created yet and the timeout is hit waiting for it to exist, an error will be returned
639-
// but the shim will continue to shutdown.
639+
// but the shim will continue to shutdown. Similarly if we detect that the VM is in pause state, then
640+
// we are unable to communicate to the in-VM agent. In this case, we do a forceful shutdown.
640641
func (s *service) StopVM(requestCtx context.Context, request *proto.StopVMRequest) (_ *empty.Empty, err error) {
641642
defer logPanicAndDie(s.logger)
642643
s.logger.WithFields(logrus.Fields{"timeout_seconds": request.TimeoutSeconds}).Debug("StopVM")
@@ -646,6 +647,20 @@ func (s *service) StopVM(requestCtx context.Context, request *proto.StopVMReques
646647
timeout = time.Duration(request.TimeoutSeconds) * time.Second
647648
}
648649

650+
info, err := s.machine.DescribeInstanceInfo(requestCtx)
651+
if err != nil {
652+
return nil, errors.Wrapf(err, "failed to get instance info %v", info)
653+
}
654+
655+
if *info.State == models.InstanceInfoStatePaused {
656+
s.logger.Debug("Instance is in Paused state, force shutdown in progress")
657+
err = s.jailer.Stop(true)
658+
if err != nil {
659+
return nil, errors.Wrap(err, "failed to stop VM in paused State")
660+
}
661+
return &empty.Empty{}, nil
662+
}
663+
649664
err = s.waitVMReady()
650665
if err != nil {
651666
return nil, err

runtime/service_integ_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1537,6 +1537,7 @@ func TestStopVM_Isolated(t *testing.T) {
15371537
stopFunc func(ctx context.Context, fcClient fccontrol.FirecrackerService, req proto.CreateVMRequest)
15381538
withStopVM bool
15391539
}{
1540+
15401541
{
15411542
name: "Successful",
15421543
withStopVM: true,
@@ -1626,6 +1627,22 @@ func TestStopVM_Isolated(t *testing.T) {
16261627
}
16271628
},
16281629
},
1630+
1631+
// Test that StopVM returns success if the VM is in paused state, instead of hanging forever.
1632+
// VM is force shutdown in this case, so we expect no Error or hang.
1633+
{
1634+
name: "PauseStop",
1635+
withStopVM: true,
1636+
1637+
createVMRequest: proto.CreateVMRequest{},
1638+
stopFunc: func(ctx context.Context, fcClient fccontrol.FirecrackerService, req proto.CreateVMRequest) {
1639+
_, err = fcClient.PauseVM(ctx, &proto.PauseVMRequest{VMID: req.VMID})
1640+
require.Equal(status.Code(err), codes.OK)
1641+
1642+
_, err = fcClient.StopVM(ctx, &proto.StopVMRequest{VMID: req.VMID})
1643+
require.NoError(err)
1644+
},
1645+
},
16291646
}
16301647

16311648
fcClient, err := newFCControlClient(containerdSockPath)

0 commit comments

Comments
 (0)