@@ -121,10 +121,7 @@ type service struct {
121
121
config * config.Config
122
122
123
123
// vmReady is closed once CreateVM has been successfully called
124
- vmReady chan struct {}
125
- // vmExitErr is set with the error returned by machine.Wait(). It should
126
- // only be read after shimCtx.Done() is closed
127
- vmExitErr error
124
+ vmReady chan struct {}
128
125
vmStartOnce sync.Once
129
126
agentClient taskAPI.TaskService
130
127
eventBridgeClient eventbridge.Getter
@@ -134,6 +131,9 @@ type service struct {
134
131
driveMountStubs []MountableStubDrive
135
132
exitAfterAllTasksDeleted bool // exit the VM and shim when all tasks are deleted
136
133
134
+ cleanupErr error
135
+ cleanupOnce sync.Once
136
+
137
137
machine * firecracker.Machine
138
138
machineConfig * firecracker.Config
139
139
vsockIOPortCount uint32
@@ -583,7 +583,6 @@ func (s *service) StopVM(requestCtx context.Context, request *proto.StopVMReques
583
583
timeout = time .Duration (request .TimeoutSeconds ) * time .Second
584
584
}
585
585
586
- defer s .shimCancel ()
587
586
err = s .waitVMReady ()
588
587
if err != nil {
589
588
return nil , err
@@ -1178,12 +1177,9 @@ func (s *service) shutdown(
1178
1177
if err := s .machine .Wait (context .Background ()); err != nil {
1179
1178
result = multierror .Append (result , err )
1180
1179
}
1181
- if err := s .jailer . Close (); err != nil {
1180
+ if err := s .cleanup (); err != nil {
1182
1181
result = multierror .Append (result , err )
1183
1182
}
1184
- if result == nil {
1185
- return nil
1186
- }
1187
1183
1188
1184
if err := result .ErrorOrNil (); err != nil {
1189
1185
return status .Error (codes .Internal , fmt .Sprintf ("the VMM was killed forcibly: %v" , err ))
@@ -1223,13 +1219,6 @@ func (s *service) shutdownLoop(
1223
1219
},
1224
1220
timeout : jailerStopTimeout ,
1225
1221
},
1226
- {
1227
- name : "cancel the context" ,
1228
- shutdown : func () error {
1229
- s .shimCancel ()
1230
- return nil
1231
- },
1232
- },
1233
1222
}
1234
1223
1235
1224
for _ , action := range actions {
@@ -1305,26 +1294,38 @@ func (s *service) Cleanup(requestCtx context.Context) (*taskAPI.DeleteResponse,
1305
1294
}, nil
1306
1295
}
1307
1296
1308
- func (s * service ) monitorVMExit () {
1309
- defer func () {
1297
+ // cleanup resources
1298
+ func (s * service ) cleanup () error {
1299
+ s .cleanupOnce .Do (func () {
1300
+ var result * multierror.Error
1310
1301
// we ignore the error here due to cleanup will only succeed if the jailing
1311
1302
// process was killed via SIGKILL
1312
1303
if err := s .jailer .Close (); err != nil {
1304
+ result = multierror .Append (result , err )
1313
1305
s .logger .WithError (err ).Error ("failed to close jailer" )
1314
1306
}
1315
1307
1308
+ if err := s .publishVMStop (); err != nil {
1309
+ result = multierror .Append (result , err )
1310
+ s .logger .WithError (err ).Error ("failed to publish stop VM event" )
1311
+ }
1312
+
1316
1313
// once the VM shuts down, the shim should too
1317
1314
s .shimCancel ()
1318
- }()
1319
1315
1316
+ s .cleanupErr = result .ErrorOrNil ()
1317
+ })
1318
+ return s .cleanupErr
1319
+ }
1320
+
1321
+ // monitorVMExit watches the VM and cleanup resources when it terminates.
1322
+ func (s * service ) monitorVMExit () {
1320
1323
// Block until the VM exits
1321
- s .vmExitErr = s .machine .Wait (s .shimCtx )
1322
- if s .vmExitErr != nil && s .vmExitErr != context .Canceled {
1323
- s .logger .WithError (s .vmExitErr ).Error ("error returned from VM wait" )
1324
+ if err := s .machine .Wait (s .shimCtx ); err != nil && err != context .Canceled {
1325
+ s .logger .WithError (err ).Error ("error returned from VM wait" )
1324
1326
}
1325
1327
1326
- publishErr := s .publishVMStop ()
1327
- if publishErr != nil {
1328
- s .logger .WithError (publishErr ).Error ("failed to publish stop VM event" )
1328
+ if err := s .cleanup (); err != nil {
1329
+ s .logger .WithError (err ).Error ("failed to clean up the VM" )
1329
1330
}
1330
1331
}
0 commit comments