@@ -202,7 +202,29 @@ func (m *manager) createInstance(
202202 kernelVer := m .systemManager .GetDefaultKernelVersion ()
203203
204204 // 9. Validate, resolve, and auto-bind devices (GPU passthrough)
205+ // Track devices we've marked as attached for cleanup on error.
206+ // The cleanup closure captures this slice by reference, so it will see
207+ // whatever devices have been attached when cleanup runs.
208+ var attachedDeviceIDs []string
205209 var resolvedDeviceIDs []string
210+
211+ // Setup cleanup stack early so device attachment errors trigger cleanup
212+ cu := cleanup .Make (func () {
213+ log .DebugContext (ctx , "cleaning up instance on error" , "instance_id" , id )
214+ m .deleteInstanceData (id )
215+ })
216+ defer cu .Clean ()
217+
218+ // Add device detachment cleanup - closure captures attachedDeviceIDs by reference
219+ if m .deviceManager != nil {
220+ cu .Add (func () {
221+ for _ , deviceID := range attachedDeviceIDs {
222+ log .DebugContext (ctx , "detaching device on cleanup" , "instance_id" , id , "device" , deviceID )
223+ m .deviceManager .MarkDetached (ctx , deviceID )
224+ }
225+ })
226+ }
227+
206228 if len (req .Devices ) > 0 && m .deviceManager != nil {
207229 for _ , deviceRef := range req .Devices {
208230 device , err := m .deviceManager .GetDevice (ctx , deviceRef )
@@ -222,6 +244,12 @@ func (m *manager) createInstance(
222244 return nil , fmt .Errorf ("bind device %s to VFIO: %w" , deviceRef , err )
223245 }
224246 }
247+ // Mark device as attached to this instance
248+ if err := m .deviceManager .MarkAttached (ctx , device .Id , id ); err != nil {
249+ log .ErrorContext (ctx , "failed to mark device as attached" , "device" , deviceRef , "error" , err )
250+ return nil , fmt .Errorf ("mark device %s as attached: %w" , deviceRef , err )
251+ }
252+ attachedDeviceIDs = append (attachedDeviceIDs , device .Id )
225253 resolvedDeviceIDs = append (resolvedDeviceIDs , device .Id )
226254 }
227255 log .DebugContext (ctx , "validated devices for passthrough" , "id" , id , "devices" , resolvedDeviceIDs )
@@ -250,28 +278,21 @@ func (m *manager) createInstance(
250278 Devices : resolvedDeviceIDs ,
251279 }
252280
253- // Setup cleanup stack for automatic rollback on errors
254- cu := cleanup .Make (func () {
255- log .DebugContext (ctx , "cleaning up instance on error" , "instance_id" , id )
256- m .deleteInstanceData (id )
257- })
258- defer cu .Clean ()
259-
260- // 8. Ensure directories
281+ // 11. Ensure directories
261282 log .DebugContext (ctx , "creating instance directories" , "instance_id" , id )
262283 if err := m .ensureDirectories (id ); err != nil {
263284 log .ErrorContext (ctx , "failed to create directories" , "instance_id" , id , "error" , err )
264285 return nil , fmt .Errorf ("ensure directories: %w" , err )
265286 }
266287
267- // 9 . Create overlay disk with specified size
288+ // 12 . Create overlay disk with specified size
268289 log .DebugContext (ctx , "creating overlay disk" , "instance_id" , id , "size_bytes" , stored .OverlaySize )
269290 if err := m .createOverlayDisk (id , stored .OverlaySize ); err != nil {
270291 log .ErrorContext (ctx , "failed to create overlay disk" , "instance_id" , id , "error" , err )
271292 return nil , fmt .Errorf ("create overlay disk: %w" , err )
272293 }
273294
274- // 14 . Allocate network (if network enabled)
295+ // 13 . Allocate network (if network enabled)
275296 var netConfig * network.NetworkConfig
276297 if networkName != "" {
277298 log .DebugContext (ctx , "allocating network" , "instance_id" , id , "network" , networkName )
@@ -296,7 +317,7 @@ func (m *manager) createInstance(
296317 })
297318 }
298319
299- // 15 . Validate and attach volumes
320+ // 14 . Validate and attach volumes
300321 if len (req .Volumes ) > 0 {
301322 log .DebugContext (ctx , "validating volumes" , "instance_id" , id , "count" , len (req .Volumes ))
302323 for _ , volAttach := range req .Volumes {
@@ -336,30 +357,30 @@ func (m *manager) createInstance(
336357 stored .Volumes = req .Volumes
337358 }
338359
339- // 16 . Create config disk (needs Instance for buildVMConfig)
360+ // 15 . Create config disk (needs Instance for buildVMConfig)
340361 inst := & Instance {StoredMetadata : * stored }
341362 log .DebugContext (ctx , "creating config disk" , "instance_id" , id )
342363 if err := m .createConfigDisk (inst , imageInfo , netConfig ); err != nil {
343364 log .ErrorContext (ctx , "failed to create config disk" , "instance_id" , id , "error" , err )
344365 return nil , fmt .Errorf ("create config disk: %w" , err )
345366 }
346367
347- // 12 . Save metadata
368+ // 16 . Save metadata
348369 log .DebugContext (ctx , "saving instance metadata" , "instance_id" , id )
349370 meta := & metadata {StoredMetadata : * stored }
350371 if err := m .saveMetadata (meta ); err != nil {
351372 log .ErrorContext (ctx , "failed to save metadata" , "instance_id" , id , "error" , err )
352373 return nil , fmt .Errorf ("save metadata: %w" , err )
353374 }
354375
355- // 13 . Start VMM and boot VM
376+ // 17 . Start VMM and boot VM
356377 log .InfoContext (ctx , "starting VMM and booting VM" , "instance_id" , id )
357378 if err := m .startAndBootVM (ctx , stored , imageInfo , netConfig ); err != nil {
358379 log .ErrorContext (ctx , "failed to start and boot VM" , "instance_id" , id , "error" , err )
359380 return nil , err
360381 }
361382
362- // 14 . Update timestamp after VM is running
383+ // 18 . Update timestamp after VM is running
363384 now := time .Now ()
364385 stored .StartedAt = & now
365386
0 commit comments