Skip to content

nfd-worker: Watch features.d changes #2156

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions pkg/nfd-worker/nfd-worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ type nfdWorker struct {
k8sClient k8sclient.Interface
nfdClient nfdclient.Interface
stop chan struct{} // channel for signaling stop
sourceEvent chan string // channel for events from soures
Copy link
Preview

Copilot AI Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a spelling error in the comment. 'soures' should be 'sources'.

Suggested change
sourceEvent chan string // channel for events from soures
sourceEvent chan string // channel for events from sources

Copilot uses AI. Check for mistakes.

featureSources []source.FeatureSource
labelSources []source.LabelSource
ownerReference []metav1.OwnerReference
Expand Down Expand Up @@ -248,6 +249,36 @@ func (w *nfdWorker) runFeatureDiscovery() error {
return nil
}

// Run feature discovery.
func (w *nfdWorker) runFeatureDiscoveryBySourceName(source string) error {
discoveryStart := time.Now()
for _, s := range w.featureSources {
if s.Name() == source {
currentSourceStart := time.Now()
if err := s.Discover(); err != nil {
klog.ErrorS(err, "feature discovery failed", "source", s.Name())
}
klog.V(3).InfoS("feature discovery completed", "featureSource", s.Name(), "duration", time.Since(currentSourceStart))
}
}

discoveryDuration := time.Since(discoveryStart)
klog.V(2).InfoS("feature discovery of all sources completed", "duration", discoveryDuration)
featureDiscoveryDuration.WithLabelValues(utils.NodeName()).Observe(discoveryDuration.Seconds())
if w.config.Core.SleepInterval.Duration > 0 && discoveryDuration > w.config.Core.SleepInterval.Duration/2 {
klog.InfoS("feature discovery sources took over half of sleep interval ", "duration", discoveryDuration, "sleepInterval", w.config.Core.SleepInterval.Duration)
}
// Get the set of feature labels.
labels := createFeatureLabels(w.labelSources, w.config.Core.LabelWhiteList.Regexp)

// Update the node with the feature labels.
if !w.config.Core.NoPublish {
return w.advertiseFeatures(labels)
}

return nil
}

// Set owner ref
func (w *nfdWorker) setOwnerReference() error {
ownerReference := []metav1.OwnerReference{}
Expand Down Expand Up @@ -304,6 +335,12 @@ func (w *nfdWorker) Run() error {
labelTrigger.Reset(w.config.Core.SleepInterval.Duration)
defer labelTrigger.Stop()

w.sourceEvent = make(chan string)
eventSources := source.GetAllEventSources()
for _, s := range eventSources {
s.SetNotifyChannel(w.sourceEvent)
Copy link
Preview

Copilot AI Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error returned by SetNotifyChannel() is not being handled. If setting up the notification channel fails for any source, it could lead to silent failures in event monitoring.

Suggested change
s.SetNotifyChannel(w.sourceEvent)
if err := s.SetNotifyChannel(w.sourceEvent); err != nil {
klog.ErrorS(err, "failed to set notify channel for event source", "source", s.Name())
return fmt.Errorf("failed to set notify channel for event source %s: %w", s.Name(), err)
}

Copilot uses AI. Check for mistakes.

}

httpMux := http.NewServeMux()

// Register to metrics server
Expand Down Expand Up @@ -341,6 +378,12 @@ func (w *nfdWorker) Run() error {
return err
}

case sourceName := <-w.sourceEvent:
err = w.runFeatureDiscoveryBySourceName(sourceName)
if err != nil {
return err
}

case <-w.stop:
klog.InfoS("shutting down nfd-worker")
return nil
Expand Down
52 changes: 49 additions & 3 deletions source/local/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

"k8s.io/klog/v2"

"github.com/fsnotify/fsnotify"
nfdv1alpha1 "sigs.k8s.io/node-feature-discovery/api/nfd/v1alpha1"
"sigs.k8s.io/node-feature-discovery/pkg/utils"
"sigs.k8s.io/node-feature-discovery/source"
Expand Down Expand Up @@ -65,10 +66,11 @@ var (
featureFilesDir = "/etc/kubernetes/node-feature-discovery/features.d/"
)

// localSource implements the FeatureSource and LabelSource interfaces.
// localSource implements the FeatureSource, LabelSource, EventSource interfaces.
type localSource struct {
features *nfdv1alpha1.Features
config *Config
features *nfdv1alpha1.Features
config *Config
fsWatcher *fsnotify.Watcher
}

type Config struct {
Expand All @@ -87,6 +89,7 @@ var (
_ source.FeatureSource = &src
_ source.LabelSource = &src
_ source.ConfigurableSource = &src
_ source.EventSource = &src
)

// Name method of the LabelSource interface
Expand Down Expand Up @@ -318,6 +321,49 @@ func getFileContent(fileName string) ([][]byte, error) {
return lines, nil
}

func (s *localSource) runNotifier(ch chan string) {
for {
select {
Copy link
Preview

Copilot AI Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The runNotifier goroutine runs indefinitely without a way to stop it. Consider adding a context or stop channel to allow graceful shutdown and prevent goroutine leaks.

Suggested change
select {
func (s *localSource) runNotifier(ctx context.Context, ch chan struct{}) {
for {
select {
case <-ctx.Done():
return

Copilot uses AI. Check for mistakes.

case event := <-s.fsWatcher.Events:
opAny := fsnotify.Create | fsnotify.Write | fsnotify.Remove | fsnotify.Rename | fsnotify.Chmod
if event.Op&opAny != 0 {
klog.V(2).InfoS("fsnotify event", "eventName", event.Name, "eventOp", event.Op)
ch <- s.Name()
}
case err := <-s.fsWatcher.Errors:
klog.ErrorS(err, "failed to to watch features.d changes")
Copy link
Preview

Copilot AI Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spelling error: duplicated 'to' - should be 'failed to watch features.d changes'

Suggested change
klog.ErrorS(err, "failed to to watch features.d changes")
klog.ErrorS(err, "failed to watch features.d changes")

Copilot uses AI. Check for mistakes.

Copy link
Preview

Copilot AI Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a duplicate 'to' in the error message. It should be "failed to watch features.d changes".

Suggested change
klog.ErrorS(err, "failed to to watch features.d changes")
klog.ErrorS(err, "failed to watch features.d changes")

Copilot uses AI. Check for mistakes.

}
time.Sleep(1 * time.Second)
Copy link
Preview

Copilot AI Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The hardcoded 1-second sleep in the event loop may cause unnecessary delays in event processing. Consider removing this sleep or making it configurable, as fsnotify events should be processed immediately.

Suggested change
time.Sleep(1 * time.Second)

Copilot uses AI. Check for mistakes.

}
}

// SetNotifyChannel method of the EventSource Interface
func (s *localSource) SetNotifyChannel(ch chan string) error {
info, err := os.Stat(featureFilesDir)
if err != nil {
if !os.IsNotExist(err) {
return err
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it does not exist we probably want to exit, too (return nil)?

}

if info != nil && info.IsDir() {
watcher, err := fsnotify.NewWatcher()
if err != nil {
return err
}

err = watcher.Add(featureFilesDir)
if err != nil {
return fmt.Errorf("unable to access %v: %w", featureFilesDir, err)
}
s.fsWatcher = watcher
}

go s.runNotifier(ch)

Copy link
Preview

Copilot AI Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Starting the goroutine unconditionally even when fsWatcher is nil (when directory doesn't exist) will cause the goroutine to block indefinitely on nil channel reads, leading to a goroutine leak.

Suggested change
go s.runNotifier(ch)
}

Copilot uses AI. Check for mistakes.

Copy link
Preview

Copilot AI Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The goroutine is started unconditionally, even when no filesystem watcher is created (when the directory doesn't exist). This will create a goroutine that only handles errors from a nil watcher, which could lead to unexpected behavior.

Suggested change
go s.runNotifier(ch)
}

Copilot uses AI. Check for mistakes.

return nil
}

func init() {
source.Register(&src)
}
19 changes: 19 additions & 0 deletions source/source.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ type SupplementalSource interface {
DisableByDefault() bool
}

// EventSource is an interface for a source that can send events
type EventSource interface {
Source

// SetNotifyChannel sets the channel
SetNotifyChannel(chan string) error
}

// FeatureLabelValue represents the value of one feature label
type FeatureLabelValue interface{}

Expand Down Expand Up @@ -155,6 +163,17 @@ func GetAllConfigurableSources() map[string]ConfigurableSource {
return all
}

// GetAllEventSources returns all registered event sources
func GetAllEventSources() map[string]EventSource {
all := make(map[string]EventSource)
for k, v := range sources {
if s, ok := v.(EventSource); ok {
all[k] = s
}
}
return all
}

// GetAllFeatures returns a combined set of all features from all feature
// sources.
func GetAllFeatures() *nfdv1alpha1.Features {
Expand Down