Skip to content

Commit 9ccbb33

Browse files
committed
fix: address PR review comments
- Rename newHealth to health (Go naming conventions) - Remove dead kubeletSock check (wrong directory being watched) - Handle closed fsnotify channels with fallback to polling - Update comment to reflect actual behavior Signed-off-by: Simon Davies <[email protected]>
1 parent 6988626 commit 9ccbb33

File tree

1 file changed

+16
-12
lines changed

1 file changed

+16
-12
lines changed

device-plugin/main.go

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -190,19 +190,19 @@ func (p *HyperlightDevicePlugin) ListAndWatch(req *pluginapi.Empty, srv pluginap
190190
case <-p.stopCh:
191191
return nil
192192
case <-ticker.C:
193-
newHealth := pluginapi.Healthy
193+
health := pluginapi.Healthy
194194
if _, err := os.Stat(p.devicePath); err != nil {
195-
newHealth = pluginapi.Unhealthy
195+
health = pluginapi.Unhealthy
196196
klog.Warningf("Device %s not found, marking all devices unhealthy", p.devicePath)
197197
}
198198

199199
// Check if health changed (compare against first device as representative)
200-
if p.devices[0].Health != newHealth {
200+
if p.devices[0].Health != health {
201201
// Update ALL devices - they all share the same underlying hypervisor device
202202
for i := range p.devices {
203-
p.devices[i].Health = newHealth
203+
p.devices[i].Health = health
204204
}
205-
klog.Infof("Device health changed to %s for all %d devices", newHealth, len(p.devices))
205+
klog.Infof("Device health changed to %s for all %d devices", health, len(p.devices))
206206
if err := srv.Send(&pluginapi.ListAndWatchResponse{Devices: p.devices}); err != nil {
207207
return err
208208
}
@@ -317,7 +317,6 @@ func (p *HyperlightDevicePlugin) Stop() {
317317
}
318318

319319
// newFSWatcher creates a filesystem watcher for kubelet restart detection.
320-
// This is the industry-standard approach used by NVIDIA, Intel, and other device plugins.
321320
func newFSWatcher(files ...string) (*fsnotify.Watcher, error) {
322321
watcher, err := fsnotify.NewWatcher()
323322
if err != nil {
@@ -336,7 +335,7 @@ func newFSWatcher(files ...string) (*fsnotify.Watcher, error) {
336335

337336
// watchKubeletRestart monitors for kubelet restarts using fsnotify.
338337
// When kubelet restarts, it deletes all sockets in /var/lib/kubelet/device-plugins/.
339-
// This function blocks until it detects a relevant filesystem event.
338+
// This function blocks until it detects our plugin socket being deleted.
340339
func (p *HyperlightDevicePlugin) watchKubeletRestart() {
341340
klog.Info("Watching for kubelet restart using fsnotify...")
342341

@@ -352,17 +351,22 @@ func (p *HyperlightDevicePlugin) watchKubeletRestart() {
352351
select {
353352
case <-p.stopCh:
354353
return
355-
case event := <-watcher.Events:
354+
case event, ok := <-watcher.Events:
355+
if !ok {
356+
klog.Warning("fsnotify events channel closed, falling back to polling")
357+
p.watchKubeletRestartPolling()
358+
return
359+
}
356360
if event.Name == serverSock && (event.Op&fsnotify.Remove) == fsnotify.Remove {
357361
klog.Info("Plugin socket deleted - kubelet may have restarted")
358362
return
359363
}
360-
// Also watch for kubelet socket recreation (indicates kubelet restart complete)
361-
if event.Name == kubeletSock && (event.Op&fsnotify.Create) == fsnotify.Create {
362-
klog.Info("Kubelet socket recreated - kubelet restart detected")
364+
case err, ok := <-watcher.Errors:
365+
if !ok {
366+
klog.Warning("fsnotify errors channel closed, falling back to polling")
367+
p.watchKubeletRestartPolling()
363368
return
364369
}
365-
case err := <-watcher.Errors:
366370
klog.Warningf("fsnotify error: %v", err)
367371
}
368372
}

0 commit comments

Comments
 (0)