Skip to content

Commit 44a5a5b

Browse files
committed
nfd-master: get node object only once when updating node
Prevent excess queries of node objects from the Kubernetes apiserver. This significantly speeds up node updates (and reduces the load on the apiserver) as the client-side throttling (which is good) does not bite us that hard.
1 parent fcf819a commit 44a5a5b

File tree

3 files changed

+31
-47
lines changed

3 files changed

+31
-47
lines changed

pkg/nfd-master/nfd-master-internal_test.go

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ func TestUpdateNodeObject(t *testing.T) {
157157
fakeMaster := newFakeMaster(fakeCli)
158158

159159
Convey("When I successfully update the node with feature labels", func() {
160-
err := fakeMaster.updateNodeObject(testNodeName, featureLabels, featureAnnotations, featureExtResources, nil)
160+
err := fakeMaster.updateNodeObject(testNode, featureLabels, featureAnnotations, featureExtResources, nil)
161161
Convey("Error is nil", func() {
162162
So(err, ShouldBeNil)
163163
})
@@ -185,19 +185,11 @@ func TestUpdateNodeObject(t *testing.T) {
185185
})
186186
})
187187

188-
Convey("When I fail to get a node while updating feature labels", func() {
189-
err := fakeMaster.updateNodeObject("non-existent-node", featureLabels, featureAnnotations, featureExtResources, nil)
190-
191-
Convey("Error is produced", func() {
192-
So(err, ShouldBeError)
193-
})
194-
})
195-
196188
Convey("When I fail to patch a node", func() {
197189
fakeCli.CoreV1().(*fakecorev1client.FakeCoreV1).PrependReactor("patch", "nodes", func(action clienttesting.Action) (handled bool, ret runtime.Object, err error) {
198190
return true, &v1.Node{}, errors.New("Fake error when patching node")
199191
})
200-
err := fakeMaster.updateNodeObject(testNodeName, nil, featureAnnotations, ExtendedResources{"": ""}, nil)
192+
err := fakeMaster.updateNodeObject(testNode, nil, featureAnnotations, ExtendedResources{"": ""}, nil)
201193

202194
Convey("Error is produced", func() {
203195
So(err, ShouldBeError)

pkg/nfd-master/nfd-master.go

Lines changed: 27 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ func (m *nfdMaster) prune() error {
503503
klog.InfoS("pruning node...", "nodeName", node.Name)
504504

505505
// Prune labels and extended resources
506-
err := m.updateNodeObject(node.Name, Labels{}, Annotations{}, ExtendedResources{}, []corev1.Taint{})
506+
err := m.updateNodeObject(&node, Labels{}, Annotations{}, ExtendedResources{}, []corev1.Taint{})
507507
if err != nil {
508508
nodeUpdateFailures.Inc()
509509
return fmt.Errorf("failed to prune node %q: %v", node.Name, err)
@@ -692,8 +692,13 @@ func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.Se
692692
klog.InfoS("gRPC SetLabels request received", "nodeName", r.NodeName)
693693
}
694694
if !m.config.NoPublish {
695+
// Fetch the node object.
696+
node, err := m.getNode(r.NodeName)
697+
if err != nil {
698+
return &pb.SetLabelsReply{}, err
699+
}
695700
// Create labels et al
696-
if err := m.refreshNodeFeatures(r.NodeName, r.GetLabels(), r.GetFeatures()); err != nil {
701+
if err := m.refreshNodeFeatures(node, r.GetLabels(), r.GetFeatures()); err != nil {
697702
nodeUpdateFailures.Inc()
698703
return &pb.SetLabelsReply{}, err
699704
}
@@ -716,15 +721,15 @@ func (m *nfdMaster) nfdAPIUpdateAllNodes() error {
716721
return nil
717722
}
718723

719-
func (m *nfdMaster) nfdAPIUpdateOneNode(nodeName string) error {
724+
func (m *nfdMaster) nfdAPIUpdateOneNode(node *corev1.Node) error {
720725
if m.nfdController == nil || m.nfdController.featureLister == nil {
721726
return nil
722727
}
723728

724-
sel := k8sLabels.SelectorFromSet(k8sLabels.Set{nfdv1alpha1.NodeFeatureObjNodeNameLabel: nodeName})
729+
sel := k8sLabels.SelectorFromSet(k8sLabels.Set{nfdv1alpha1.NodeFeatureObjNodeNameLabel: node.Name})
725730
objs, err := m.nfdController.featureLister.List(sel)
726731
if err != nil {
727-
return fmt.Errorf("failed to get NodeFeature resources for node %q: %w", nodeName, err)
732+
return fmt.Errorf("failed to get NodeFeature resources for node %q: %w", node.Name, err)
728733
}
729734

730735
// Sort our objects
@@ -748,7 +753,7 @@ func (m *nfdMaster) nfdAPIUpdateOneNode(nodeName string) error {
748753
return nil
749754
}
750755

751-
klog.V(1).InfoS("processing of node initiated by NodeFeature API", "nodeName", nodeName)
756+
klog.V(1).InfoS("processing of node initiated by NodeFeature API", "nodeName", node.Name)
752757

753758
features := nfdv1alpha1.NewNodeFeatureSpec()
754759

@@ -775,7 +780,7 @@ func (m *nfdMaster) nfdAPIUpdateOneNode(nodeName string) error {
775780
// Update node labels et al. This may also mean removing all NFD-owned
776781
// labels (et al.), for example in the case no NodeFeature objects are
777782
// present.
778-
if err := m.refreshNodeFeatures(nodeName, features.Labels, &features.Features); err != nil {
783+
if err := m.refreshNodeFeatures(node, features.Labels, &features.Features); err != nil {
779784
return err
780785
}
781786

@@ -820,14 +825,14 @@ func filterExtendedResource(name, value string, features *nfdv1alpha1.Features)
820825
return filteredValue, nil
821826
}
822827

823-
func (m *nfdMaster) refreshNodeFeatures(nodeName string, labels map[string]string, features *nfdv1alpha1.Features) error {
828+
func (m *nfdMaster) refreshNodeFeatures(node *corev1.Node, labels map[string]string, features *nfdv1alpha1.Features) error {
824829
if m.config.AutoDefaultNs {
825830
labels = addNsToMapKeys(labels, nfdv1alpha1.FeatureLabelNs)
826831
} else if labels == nil {
827832
labels = make(map[string]string)
828833
}
829834

830-
crLabels, crAnnotations, crExtendedResources, crTaints := m.processNodeFeatureRule(nodeName, features)
835+
crLabels, crAnnotations, crExtendedResources, crTaints := m.processNodeFeatureRule(node.Name, features)
831836

832837
// Mix in CR-originated labels
833838
maps.Copy(labels, crLabels)
@@ -849,9 +854,9 @@ func (m *nfdMaster) refreshNodeFeatures(nodeName string, labels map[string]strin
849854
taints = filterTaints(crTaints)
850855
}
851856

852-
err := m.updateNodeObject(nodeName, labels, annotations, extendedResources, taints)
857+
err := m.updateNodeObject(node, labels, annotations, extendedResources, taints)
853858
if err != nil {
854-
klog.ErrorS(err, "failed to update node", "nodeName", nodeName)
859+
klog.ErrorS(err, "failed to update node", "nodeName", node.Name)
855860
return err
856861
}
857862

@@ -861,14 +866,9 @@ func (m *nfdMaster) refreshNodeFeatures(nodeName string, labels map[string]strin
861866
// setTaints sets node taints and annotations based on the taints passed via
862867
// nodeFeatureRule custom resorce. If empty list of taints is passed, currently
863868
// NFD owned taints and annotations are removed from the node.
864-
func (m *nfdMaster) setTaints(taints []corev1.Taint, nodeName string) error {
865-
// Fetch the node object.
866-
node, err := m.getNode(nodeName)
867-
if err != nil {
868-
return err
869-
}
870-
869+
func (m *nfdMaster) setTaints(taints []corev1.Taint, node *corev1.Node) error {
871870
// De-serialize the taints annotation into corev1.Taint type for comparision below.
871+
var err error
872872
oldTaints := []corev1.Taint{}
873873
if val, ok := node.Annotations[nfdv1alpha1.NodeTaintsAnnotation]; ok {
874874
sts := strings.Split(val, ",")
@@ -905,11 +905,10 @@ func (m *nfdMaster) setTaints(taints []corev1.Taint, nodeName string) error {
905905
}
906906

907907
if taintsUpdated {
908-
err = controller.PatchNodeTaints(context.TODO(), m.k8sClient, nodeName, node, newNode)
909-
if err != nil {
908+
if err := controller.PatchNodeTaints(context.TODO(), m.k8sClient, node.Name, node, newNode); err != nil {
910909
return fmt.Errorf("failed to patch the node %v", node.Name)
911910
}
912-
klog.InfoS("updated node taints", "nodeName", nodeName)
911+
klog.InfoS("updated node taints", "nodeName", node.Name)
913912
}
914913

915914
// Update node annotation that holds the taints managed by us
@@ -926,11 +925,10 @@ func (m *nfdMaster) setTaints(taints []corev1.Taint, nodeName string) error {
926925

927926
patches := createPatches([]string{nfdv1alpha1.NodeTaintsAnnotation}, node.Annotations, newAnnotations, "/metadata/annotations")
928927
if len(patches) > 0 {
929-
err = m.patchNode(node.Name, patches)
930-
if err != nil {
928+
if err := m.patchNode(node.Name, patches); err != nil {
931929
return fmt.Errorf("error while patching node object: %w", err)
932930
}
933-
klog.V(1).InfoS("patched node annotations for taints", "nodeName", nodeName)
931+
klog.V(1).InfoS("patched node annotations for taints", "nodeName", node.Name)
934932
}
935933
return nil
936934
}
@@ -1024,13 +1022,7 @@ func (m *nfdMaster) processNodeFeatureRule(nodeName string, features *nfdv1alpha
10241022
// updateNodeObject ensures the Kubernetes node object is up to date,
10251023
// creating new labels and extended resources where necessary and removing
10261024
// outdated ones. Also updates the corresponding annotations.
1027-
func (m *nfdMaster) updateNodeObject(nodeName string, labels Labels, featureAnnotations Annotations, extendedResources ExtendedResources, taints []corev1.Taint) error {
1028-
// Get the worker node object
1029-
node, err := m.getNode(nodeName)
1030-
if err != nil {
1031-
return err
1032-
}
1033-
1025+
func (m *nfdMaster) updateNodeObject(node *corev1.Node, labels Labels, featureAnnotations Annotations, extendedResources ExtendedResources, taints []corev1.Taint) error {
10341026
annotations := make(Annotations)
10351027

10361028
// Store names of labels in an annotation
@@ -1083,7 +1075,7 @@ func (m *nfdMaster) updateNodeObject(nodeName string, labels Labels, featureAnno
10831075

10841076
// patch node status with extended resource changes
10851077
statusPatches := m.createExtendedResourcePatches(node, extendedResources)
1086-
err = m.patchNodeStatus(node.Name, statusPatches)
1078+
err := m.patchNodeStatus(node.Name, statusPatches)
10871079
if err != nil {
10881080
return fmt.Errorf("error while patching extended resources: %w", err)
10891081
}
@@ -1096,13 +1088,13 @@ func (m *nfdMaster) updateNodeObject(nodeName string, labels Labels, featureAnno
10961088

10971089
if len(patches) > 0 || len(statusPatches) > 0 {
10981090
nodeUpdates.Inc()
1099-
klog.InfoS("node updated", "nodeName", nodeName)
1091+
klog.InfoS("node updated", "nodeName", node.Name)
11001092
} else {
1101-
klog.V(1).InfoS("no updates to node", "nodeName", nodeName)
1093+
klog.V(1).InfoS("no updates to node", "nodeName", node.Name)
11021094
}
11031095

11041096
// Set taints
1105-
err = m.setTaints(taints, node.Name)
1097+
err = m.setTaints(taints, node)
11061098
if err != nil {
11071099
return err
11081100
}

pkg/nfd-master/node-updater-pool.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ func (u *nodeUpdaterPool) processNodeUpdateRequest(queue workqueue.RateLimitingI
5353
nodeUpdateRequests.Inc()
5454

5555
// Check if node exists
56-
if _, err := u.nfdMaster.getNode(nodeName); apierrors.IsNotFound(err) {
56+
if node, err := u.nfdMaster.getNode(nodeName); apierrors.IsNotFound(err) {
5757
klog.InfoS("node not found, skip update", "nodeName", nodeName)
58-
} else if err := u.nfdMaster.nfdAPIUpdateOneNode(nodeName); err != nil {
58+
} else if err := u.nfdMaster.nfdAPIUpdateOneNode(node); err != nil {
5959
if n := queue.NumRequeues(nodeName); n < 15 {
6060
klog.InfoS("retrying node update", "nodeName", nodeName, "lastError", err, "numRetries", n)
6161
} else {

0 commit comments

Comments
 (0)