Skip to content

Commit cc0efea

Browse files
author
Rafael Garcia
committed
markattached bug
1 parent 361998f commit cc0efea

File tree

2 files changed

+36
-17
lines changed

2 files changed

+36
-17
lines changed

lib/devices/manager.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -800,5 +800,3 @@ func (m *manager) findByPCIAddress(pciAddress string) (*Device, error) {
800800

801801
return nil, ErrNotFound
802802
}
803-
804-

lib/instances/create.go

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,29 @@ func (m *manager) createInstance(
202202
kernelVer := m.systemManager.GetDefaultKernelVersion()
203203

204204
// 9. Validate, resolve, and auto-bind devices (GPU passthrough)
205+
// Track devices we've marked as attached for cleanup on error.
206+
// The cleanup closure captures this slice by reference, so it will see
207+
// whatever devices have been attached when cleanup runs.
208+
var attachedDeviceIDs []string
205209
var resolvedDeviceIDs []string
210+
211+
// Setup cleanup stack early so device attachment errors trigger cleanup
212+
cu := cleanup.Make(func() {
213+
log.DebugContext(ctx, "cleaning up instance on error", "instance_id", id)
214+
m.deleteInstanceData(id)
215+
})
216+
defer cu.Clean()
217+
218+
// Add device detachment cleanup - closure captures attachedDeviceIDs by reference
219+
if m.deviceManager != nil {
220+
cu.Add(func() {
221+
for _, deviceID := range attachedDeviceIDs {
222+
log.DebugContext(ctx, "detaching device on cleanup", "instance_id", id, "device", deviceID)
223+
m.deviceManager.MarkDetached(ctx, deviceID)
224+
}
225+
})
226+
}
227+
206228
if len(req.Devices) > 0 && m.deviceManager != nil {
207229
for _, deviceRef := range req.Devices {
208230
device, err := m.deviceManager.GetDevice(ctx, deviceRef)
@@ -222,6 +244,12 @@ func (m *manager) createInstance(
222244
return nil, fmt.Errorf("bind device %s to VFIO: %w", deviceRef, err)
223245
}
224246
}
247+
// Mark device as attached to this instance
248+
if err := m.deviceManager.MarkAttached(ctx, device.Id, id); err != nil {
249+
log.ErrorContext(ctx, "failed to mark device as attached", "device", deviceRef, "error", err)
250+
return nil, fmt.Errorf("mark device %s as attached: %w", deviceRef, err)
251+
}
252+
attachedDeviceIDs = append(attachedDeviceIDs, device.Id)
225253
resolvedDeviceIDs = append(resolvedDeviceIDs, device.Id)
226254
}
227255
log.DebugContext(ctx, "validated devices for passthrough", "id", id, "devices", resolvedDeviceIDs)
@@ -250,28 +278,21 @@ func (m *manager) createInstance(
250278
Devices: resolvedDeviceIDs,
251279
}
252280

253-
// Setup cleanup stack for automatic rollback on errors
254-
cu := cleanup.Make(func() {
255-
log.DebugContext(ctx, "cleaning up instance on error", "instance_id", id)
256-
m.deleteInstanceData(id)
257-
})
258-
defer cu.Clean()
259-
260-
// 8. Ensure directories
281+
// 11. Ensure directories
261282
log.DebugContext(ctx, "creating instance directories", "instance_id", id)
262283
if err := m.ensureDirectories(id); err != nil {
263284
log.ErrorContext(ctx, "failed to create directories", "instance_id", id, "error", err)
264285
return nil, fmt.Errorf("ensure directories: %w", err)
265286
}
266287

267-
// 9. Create overlay disk with specified size
288+
// 12. Create overlay disk with specified size
268289
log.DebugContext(ctx, "creating overlay disk", "instance_id", id, "size_bytes", stored.OverlaySize)
269290
if err := m.createOverlayDisk(id, stored.OverlaySize); err != nil {
270291
log.ErrorContext(ctx, "failed to create overlay disk", "instance_id", id, "error", err)
271292
return nil, fmt.Errorf("create overlay disk: %w", err)
272293
}
273294

274-
// 14. Allocate network (if network enabled)
295+
// 13. Allocate network (if network enabled)
275296
var netConfig *network.NetworkConfig
276297
if networkName != "" {
277298
log.DebugContext(ctx, "allocating network", "instance_id", id, "network", networkName)
@@ -296,7 +317,7 @@ func (m *manager) createInstance(
296317
})
297318
}
298319

299-
// 15. Validate and attach volumes
320+
// 14. Validate and attach volumes
300321
if len(req.Volumes) > 0 {
301322
log.DebugContext(ctx, "validating volumes", "instance_id", id, "count", len(req.Volumes))
302323
for _, volAttach := range req.Volumes {
@@ -336,30 +357,30 @@ func (m *manager) createInstance(
336357
stored.Volumes = req.Volumes
337358
}
338359

339-
// 16. Create config disk (needs Instance for buildVMConfig)
360+
// 15. Create config disk (needs Instance for buildVMConfig)
340361
inst := &Instance{StoredMetadata: *stored}
341362
log.DebugContext(ctx, "creating config disk", "instance_id", id)
342363
if err := m.createConfigDisk(inst, imageInfo, netConfig); err != nil {
343364
log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err)
344365
return nil, fmt.Errorf("create config disk: %w", err)
345366
}
346367

347-
// 12. Save metadata
368+
// 16. Save metadata
348369
log.DebugContext(ctx, "saving instance metadata", "instance_id", id)
349370
meta := &metadata{StoredMetadata: *stored}
350371
if err := m.saveMetadata(meta); err != nil {
351372
log.ErrorContext(ctx, "failed to save metadata", "instance_id", id, "error", err)
352373
return nil, fmt.Errorf("save metadata: %w", err)
353374
}
354375

355-
// 13. Start VMM and boot VM
376+
// 17. Start VMM and boot VM
356377
log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id)
357378
if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil {
358379
log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err)
359380
return nil, err
360381
}
361382

362-
// 14. Update timestamp after VM is running
383+
// 18. Update timestamp after VM is running
363384
now := time.Now()
364385
stored.StartedAt = &now
365386

0 commit comments

Comments
 (0)