@@ -42,6 +42,11 @@ type Manager interface {
4242
4343 // MarkDetached marks a device as detached from an instance
4444 MarkDetached (ctx context.Context , deviceID string ) error
45+
46+ // ReconcileDevices cleans up stale device state on startup.
47+ // It detects devices with AttachedTo referencing non-existent instances
48+ // and clears the orphaned attachment state.
49+ ReconcileDevices (ctx context.Context ) error
4550}
4651
4752type manager struct {
@@ -59,6 +64,9 @@ func NewManager(p *paths.Paths) Manager {
5964}
6065
6166func (m * manager ) ListDevices (ctx context.Context ) ([]Device , error ) {
67+ // RLock protects against concurrent directory modifications (CreateDevice/DeleteDevice)
68+ // during iteration. While individual file reads are atomic, directory iteration could
69+ // see inconsistent state if a device is being created or deleted concurrently.
6270 m .mu .RLock ()
6371 defer m .mu .RUnlock ()
6472
@@ -172,6 +180,8 @@ func (m *manager) CreateDevice(ctx context.Context, req CreateDeviceRequest) (*D
172180}
173181
174182func (m * manager ) GetDevice (ctx context.Context , idOrName string ) (* Device , error ) {
183+ // RLock protects against concurrent modifications while looking up by name,
184+ // which requires iterating the devices directory.
175185 m .mu .RLock ()
176186 defer m .mu .RUnlock ()
177187
@@ -343,6 +353,79 @@ func (m *manager) MarkDetached(ctx context.Context, deviceID string) error {
343353 return m .saveDevice (device )
344354}
345355
356+ // ReconcileDevices cleans up stale device state on startup.
357+ // It scans all registered devices and clears AttachedTo for any device
358+ // that references an instance that no longer exists.
359+ func (m * manager ) ReconcileDevices (ctx context.Context ) error {
360+ log := logger .FromContext (ctx )
361+ log .InfoContext (ctx , "reconciling device state" )
362+
363+ m .mu .Lock ()
364+ defer m .mu .Unlock ()
365+
366+ entries , err := os .ReadDir (m .paths .DevicesDir ())
367+ if err != nil {
368+ if os .IsNotExist (err ) {
369+ // No devices directory yet, nothing to reconcile
370+ return nil
371+ }
372+ return fmt .Errorf ("read devices dir: %w" , err )
373+ }
374+
375+ var reconciled int
376+ for _ , entry := range entries {
377+ if ! entry .IsDir () {
378+ continue
379+ }
380+
381+ device , err := m .loadDevice (entry .Name ())
382+ if err != nil {
383+ log .WarnContext (ctx , "failed to load device during reconciliation" ,
384+ "device_id" , entry .Name (),
385+ "error" , err ,
386+ )
387+ continue
388+ }
389+
390+ // Skip devices that aren't attached to anything
391+ if device .AttachedTo == nil {
392+ continue
393+ }
394+
395+ // Check if the referenced instance still exists
396+ instanceID := * device .AttachedTo
397+ instanceDir := m .paths .InstanceDir (instanceID )
398+ if _ , err := os .Stat (instanceDir ); os .IsNotExist (err ) {
399+ // Instance no longer exists - clear the orphaned attachment
400+ log .WarnContext (ctx , "clearing orphaned device attachment" ,
401+ "device_id" , device .Id ,
402+ "device_name" , device .Name ,
403+ "orphaned_instance_id" , instanceID ,
404+ )
405+
406+ device .AttachedTo = nil
407+ if err := m .saveDevice (device ); err != nil {
408+ log .ErrorContext (ctx , "failed to save device after clearing attachment" ,
409+ "device_id" , device .Id ,
410+ "error" , err ,
411+ )
412+ continue
413+ }
414+ reconciled ++
415+ }
416+ }
417+
418+ if reconciled > 0 {
419+ log .InfoContext (ctx , "device reconciliation complete" ,
420+ "orphaned_attachments_cleared" , reconciled ,
421+ )
422+ } else {
423+ log .InfoContext (ctx , "device reconciliation complete, no orphaned attachments found" )
424+ }
425+
426+ return nil
427+ }
428+
346429// Helper methods
347430
348431func (m * manager ) loadDevice (id string ) (* Device , error ) {
0 commit comments