@@ -45,6 +45,7 @@ const (
4545 pciBasePath = "/sys/bus/pci/devices"
4646 connectionTimeout = 120 * time .Second // Google gRPC default timeout
4747 PCIResourcePrefix = "PCI_RESOURCE"
48+ tickerTimeout = 30 * time .Second
4849)
4950
5051type PCIDevice struct {
@@ -292,7 +293,6 @@ func (dp *PCIDevicePlugin) Allocate(_ context.Context, r *pluginapi.AllocateRequ
292293}
293294
294295func (dp * PCIDevicePlugin ) healthCheck () error {
295- logger := log .DefaultLogger ()
296296 monitoredDevices := make (map [string ]string )
297297 watcher , err := fsnotify .NewWatcher ()
298298 if err != nil {
@@ -348,31 +348,37 @@ func (dp *PCIDevicePlugin) healthCheck() error {
348348 return fmt .Errorf ("failed to watch device-plugin socket: %v" , err )
349349 }
350350
351+ return dp .performCheck (monitoredDevices , watcher )
352+ }
353+
354+ func (dp * PCIDevicePlugin ) performCheck (monitoredDevices map [string ]string , watcher * fsnotify.Watcher ) error {
351355 for {
352356 select {
353357 case <- dp .stop :
354358 return nil
359+ case <- dp .done :
360+ return nil
355361 case err := <- watcher .Errors :
356- logger . Reason ( err ). Errorf ("error watching devices and device plugin directory" )
362+ logrus . Errorf ("error watching devices and device plugin directory: %v" , err )
357363 case event := <- watcher .Events :
358- logger . V ( 4 ) .Infof ("health Event: %v" , event )
364+ logrus .Infof ("health Event: %v" , event )
359365 if monDevID , exist := monitoredDevices [event .Name ]; exist {
360366 // Health in this case is if the device path actually exists
361367 if event .Op == fsnotify .Create {
362- logger .Infof ("monitored device %s appeared" , dp .resourceName )
368+ logrus .Infof ("monitored device %s appeared" , dp .resourceName )
363369 dp .health <- deviceHealth {
364370 DevID : monDevID ,
365371 Health : pluginapi .Healthy ,
366372 }
367373 } else if (event .Op == fsnotify .Remove ) || (event .Op == fsnotify .Rename ) {
368- logger .Infof ("monitored device %s disappeared" , dp .resourceName )
374+ logrus .Infof ("monitored device %s disappeared" , dp .resourceName )
369375 dp .health <- deviceHealth {
370376 DevID : monDevID ,
371377 Health : pluginapi .Unhealthy ,
372378 }
373379 }
374380 } else if event .Name == dp .socketPath && event .Op == fsnotify .Remove {
375- logger .Infof ("device socket file for device %s was removed, kubelet probably restarted." , dp .resourceName )
381+ logrus .Infof ("device socket file for device %s was removed, kubelet probably restarted." , dp .resourceName )
376382 return nil
377383 }
378384 }
@@ -389,14 +395,12 @@ func (dp *PCIDevicePlugin) GetDeviceName() string {
389395
390396// Stop stops the gRPC server
391397func (dp * PCIDevicePlugin ) stopDevicePlugin () error {
392- defer func () {
393- if ! IsChanClosed (dp .done ) {
394- close (dp .done )
395- }
396- }()
398+ if ! IsChanClosed (dp .done ) {
399+ close (dp .done )
400+ }
397401
398402 // Give the device plugin one second to properly deregister
399- ticker := time .NewTicker (1 * time . Second )
403+ ticker := time .NewTicker (tickerTimeout )
400404 defer ticker .Stop ()
401405 select {
402406 case <- dp .deregistered :
0 commit comments