@@ -647,26 +647,10 @@ func (s *service) StopVM(requestCtx context.Context, request *proto.StopVMReques
647
647
timeout = time .Duration (request .TimeoutSeconds ) * time .Second
648
648
}
649
649
650
- info , err := s .machine .DescribeInstanceInfo (requestCtx )
651
- if err != nil {
652
- return nil , errors .Wrapf (err , "failed to get instance info %v" , info )
653
- }
654
-
655
- if * info .State == models .InstanceInfoStatePaused {
656
- s .logger .Debug ("Instance is in Paused state, force shutdown in progress" )
657
- err = s .jailer .Stop (true )
658
- if err != nil {
659
- return nil , errors .Wrap (err , "failed to stop VM in paused State" )
660
- }
661
- return & empty.Empty {}, nil
662
- }
663
-
664
- err = s .waitVMReady ()
665
- if err != nil {
666
- return nil , err
667
- }
650
+ ctx , cancel := context .WithTimeout (requestCtx , timeout )
651
+ defer cancel ()
668
652
669
- if err = s .shutdown ( requestCtx , timeout , & taskAPI. ShutdownRequest { Now : true } ); err != nil {
653
+ if err = s .terminate ( ctx ); err != nil {
670
654
return nil , err
671
655
}
672
656
return & empty.Empty {}, nil
@@ -1563,94 +1547,80 @@ func (s *service) Shutdown(requestCtx context.Context, req *taskAPI.ShutdownRequ
1563
1547
return & ptypes.Empty {}, nil
1564
1548
}
1565
1549
1566
- if err := s .shutdown (requestCtx , defaultShutdownTimeout , req ); err != nil {
1550
+ ctx , cancel := context .WithTimeout (requestCtx , defaultShutdownTimeout )
1551
+ defer cancel ()
1552
+
1553
+ if err := s .terminate (ctx ); err != nil {
1567
1554
return & ptypes.Empty {}, err
1568
1555
}
1569
1556
1570
1557
return & ptypes.Empty {}, nil
1571
1558
}
1572
1559
1573
- func (s * service ) shutdown (
1574
- requestCtx context.Context ,
1575
- timeout time.Duration ,
1576
- req * taskAPI.ShutdownRequest ,
1577
- ) error {
1578
- s .logger .Info ("stopping the VM" )
1560
+ func (s * service ) isPaused (ctx context.Context ) (bool , error ) {
1561
+ info , err := s .machine .DescribeInstanceInfo (ctx )
1562
+ if err != nil {
1563
+ return false , errors .Wrapf (err , "failed to get instance info %v" , info )
1564
+ }
1565
+ return * info .State == models .InstanceInfoStatePaused , nil
1566
+ }
1579
1567
1580
- go func () {
1581
- s .shutdownLoop (requestCtx , timeout , req )
1582
- }()
1568
+ func (s * service ) forceTerminate (ctx context.Context ) error {
1569
+ s .logger .Errorf ("forcefully terminate VM %s" , s .vmID )
1583
1570
1584
- var result * multierror.Error
1585
- if err := s .machine .Wait (context .Background ()); err != nil {
1586
- result = multierror .Append (result , err )
1587
- }
1588
- if err := s .cleanup (); err != nil {
1589
- result = multierror .Append (result , err )
1571
+ err := s .jailer .Stop (true )
1572
+ if err != nil {
1573
+ s .logger .WithError (err ).Error ("failed to stop" )
1590
1574
}
1591
1575
1592
- if err := result .ErrorOrNil (); err != nil {
1593
- return status .Error (codes .Internal , fmt .Sprintf ("the VMM was killed forcibly: %v" , err ))
1576
+ err = s .cleanup ()
1577
+ if err != nil {
1578
+ s .logger .WithError (err ).Error ("failed to cleanup" )
1594
1579
}
1595
- return nil
1580
+
1581
+ return status .Errorf (codes .Internal , "forcefully terminated VM %s" , s .vmID )
1596
1582
}
1597
1583
1598
- // shutdownLoop sends multiple different shutdown requests to stop the VMM.
1599
- // 1) send a request to the in-VM agent, which is presumed to cause the VM to begin a reboot.
1600
- // 2) stop the VM through jailer#Stop(). The signal should be visible from the VMM (e.g. SIGTERM)
1601
- // 3) stop the VM through cancelling the associated context. The signal would not be visible from the VMM (e.g. SIGKILL)
1602
- func (s * service ) shutdownLoop (
1603
- requestCtx context.Context ,
1604
- timeout time.Duration ,
1605
- req * taskAPI.ShutdownRequest ,
1606
- ) {
1607
- actions := []struct {
1608
- name string
1609
- shutdown func () error
1610
- timeout time.Duration
1611
- }{
1612
- {
1613
- name : "send a request to the in-VM agent" ,
1614
- shutdown : func () error {
1615
- _ , err := s .agentClient .Shutdown (requestCtx , req )
1616
- if err != nil {
1617
- return err
1618
- }
1619
- return nil
1620
- },
1621
- timeout : timeout ,
1622
- },
1623
- {
1624
- name : "stop the jailer by SIGTERM" ,
1625
- shutdown : func () error {
1626
- return s .jailer .Stop (false )
1627
- },
1628
- timeout : jailerStopTimeout ,
1629
- },
1630
- {
1631
- name : "stop the jailer by SIGKILL" ,
1632
- shutdown : func () error {
1633
- return s .jailer .Stop (true )
1634
- },
1635
- timeout : jailerStopTimeout ,
1636
- },
1584
+ func (s * service ) terminate (ctx context.Context ) (retErr error ) {
1585
+ var success bool
1586
+ defer func () {
1587
+ if ! success {
1588
+ retErr = s .forceTerminate (ctx )
1589
+ }
1590
+ }()
1591
+
1592
+ err := s .waitVMReady ()
1593
+ if err != nil {
1594
+ s .logger .WithError (err ).Error ("failed to wait VM" )
1595
+ return
1637
1596
}
1638
1597
1639
- for _ , action := range actions {
1640
- pid , err := s . machine . PID ()
1641
- if pid == 0 && err != nil {
1642
- break // we have nothing to kill
1643
- }
1598
+ paused , err := s . isPaused ( ctx )
1599
+ if err != nil {
1600
+ s . logger . WithError ( err ). Error ( "failed to check VM" )
1601
+ return
1602
+ }
1644
1603
1645
- s .logger .Debug (action .name )
1646
- err = action .shutdown ()
1647
- if err != nil {
1648
- // if sending an request doesn't succeed, don't wait and carry on.
1649
- s .logger .WithError (err ).Errorf ("failed to %s" , action .name )
1650
- } else {
1651
- time .Sleep (action .timeout )
1652
- }
1604
+ if paused {
1605
+ s .logger .Error ("VM is paused and cannot take requests" )
1606
+ return
1653
1607
}
1608
+
1609
+ s .logger .Info ("gracefully shutdown VM" )
1610
+ _ , err = s .agentClient .Shutdown (ctx , & taskAPI.ShutdownRequest {ID : s .vmID , Now : true })
1611
+ if err != nil {
1612
+ s .logger .WithError (err ).Error ("failed to call in-VM agent" )
1613
+ return
1614
+ }
1615
+
1616
+ err = s .machine .Wait (ctx )
1617
+ if err != nil {
1618
+ s .logger .WithError (err ).Error ("failed to wait VM" )
1619
+ return
1620
+ }
1621
+
1622
+ success = true
1623
+ return
1654
1624
}
1655
1625
1656
1626
func (s * service ) Stats (requestCtx context.Context , req * taskAPI.StatsRequest ) (* taskAPI.StatsResponse , error ) {
0 commit comments