Skip to content

Commit 376e1d9

Browse files
committed
nrt: overreserve: minimize the pod listings
In #557 we replaced the custom indexer implementation with the simpler lister implementation. The change works, but we now do unnecessary work relisting for each node which is processed, which is wasteful: even though client-go is optimized, let's avoid useless work, because we can relist the pods once per resync attempt. Signed-off-by: Francesco Romani <[email protected]>
1 parent 9701eb8 commit 376e1d9

File tree

4 files changed

+213
-50
lines changed

4 files changed

+213
-50
lines changed

pkg/noderesourcetopology/cache/overreserve.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,13 @@ func (ov *OverReserve) Resync() {
191191
return
192192
}
193193

194+
// node -> pod identifier (namespace, name)
195+
nodeToObjsMap, err := makeNodeToNamespacedNamesMap(ov.podLister, logID)
196+
if err != nil {
197+
klog.ErrorS(err, "cannot find the mapping between running pods and nodes")
198+
return
199+
}
200+
194201
klog.V(6).InfoS("nrtcache: resync NodeTopology cache starting", "logID", logID)
195202
defer klog.V(6).InfoS("nrtcache: resync NodeTopology cache complete", "logID", logID)
196203

@@ -206,6 +213,13 @@ func (ov *OverReserve) Resync() {
206213
continue
207214
}
208215

216+
objs, ok := nodeToObjsMap[nodeName]
217+
if !ok {
218+
// this really should never happen
219+
klog.V(3).InfoS("nrtcache: cannot find any pod for node", "logID", logID, "node", nodeName)
220+
continue
221+
}
222+
209223
pfpExpected := podFingerprintForNodeTopology(nrtCandidate)
210224
if pfpExpected == "" {
211225
klog.V(3).InfoS("nrtcache: missing NodeTopology podset fingerprint data", "logID", logID, "node", nodeName)
@@ -214,7 +228,7 @@ func (ov *OverReserve) Resync() {
214228

215229
klog.V(6).InfoS("nrtcache: trying to resync NodeTopology", "logID", logID, "node", nodeName, "fingerprint", pfpExpected)
216230

217-
err = checkPodFingerprintForNode(logID, ov.podLister, nodeName, pfpExpected)
231+
err = checkPodFingerprintForNode(logID, objs, nodeName, pfpExpected)
218232
if errors.Is(err, podfingerprint.ErrSignatureMismatch) {
219233
// can happen, not critical
220234
klog.V(5).InfoS("nrtcache: NodeTopology podset fingerprint mismatch", "logID", logID, "node", nodeName)

pkg/noderesourcetopology/cache/overreserve_test.go

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package cache
1818

1919
import (
2020
"encoding/json"
21-
"fmt"
2221
"reflect"
2322
"sort"
2423
"testing"
@@ -31,7 +30,6 @@ import (
3130
corev1 "k8s.io/api/core/v1"
3231
"k8s.io/apimachinery/pkg/api/resource"
3332
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
34-
"k8s.io/apimachinery/pkg/labels"
3533
podlisterv1 "k8s.io/client-go/listers/core/v1"
3634
)
3735

@@ -657,39 +655,6 @@ func dumpNRT(nrtObj *topologyv1alpha2.NodeResourceTopology) string {
657655
return string(nrtJson)
658656
}
659657

660-
type fakePodLister struct {
661-
pods []*corev1.Pod
662-
err error
663-
}
664-
665-
type fakePodNamespaceLister struct {
666-
parent *fakePodLister
667-
namespace string
668-
}
669-
670-
func (fpl *fakePodLister) AddPod(pod *corev1.Pod) {
671-
fpl.pods = append(fpl.pods, pod)
672-
}
673-
674-
func (fpl *fakePodLister) List(selector labels.Selector) ([]*corev1.Pod, error) {
675-
return fpl.pods, fpl.err
676-
}
677-
678-
func (fpl *fakePodLister) Pods(namespace string) podlisterv1.PodNamespaceLister {
679-
return &fakePodNamespaceLister{
680-
parent: fpl,
681-
namespace: namespace,
682-
}
683-
}
684-
685-
func (fpnl *fakePodNamespaceLister) List(selector labels.Selector) ([]*corev1.Pod, error) {
686-
return nil, fmt.Errorf("not yet implemented")
687-
}
688-
689-
func (fpnl *fakePodNamespaceLister) Get(name string) (*corev1.Pod, error) {
690-
return nil, fmt.Errorf("not yet implemented")
691-
}
692-
693658
func MakeTopologyResInfo(name, capacity, available string) topologyv1alpha2.ResourceInfo {
694659
return topologyv1alpha2.ResourceInfo{
695660
Name: name,

pkg/noderesourcetopology/cache/store.go

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -211,12 +211,7 @@ func podFingerprintForNodeTopology(nrt *topologyv1alpha2.NodeResourceTopology) s
211211
// checkPodFingerprintForNode verifies if the given pods fingeprint (usually from NRT update) matches the
212212
// computed one using the stored data about pods running on nodes. Returns nil on success, or an error
213213
// describing the failure
214-
func checkPodFingerprintForNode(logID string, podLister podlisterv1.PodLister, nodeName, pfpExpected string) error {
215-
objs, err := getPodNamespacedNamesByNode(podLister, logID, nodeName)
216-
if err != nil {
217-
return err
218-
}
219-
214+
func checkPodFingerprintForNode(logID string, objs []types.NamespacedName, nodeName, pfpExpected string) error {
220215
st := podfingerprint.MakeStatus(nodeName)
221216
pfp := podfingerprint.NewTracingFingerprint(len(objs), &st)
222217
for _, obj := range objs {
@@ -230,24 +225,23 @@ func checkPodFingerprintForNode(logID string, podLister podlisterv1.PodLister, n
230225
return pfp.Check(pfpExpected)
231226
}
232227

233-
func getPodNamespacedNamesByNode(podLister podlisterv1.PodLister, logID, nodeName string) ([]types.NamespacedName, error) {
228+
func makeNodeToNamespacedNamesMap(podLister podlisterv1.PodLister, logID string) (map[string][]types.NamespacedName, error) {
229+
nodeToObjsMap := make(map[string][]types.NamespacedName)
234230
pods, err := podLister.List(labels.Everything())
235231
if err != nil {
236-
return []types.NamespacedName{}, err
232+
return nodeToObjsMap, err
237233
}
238-
objs := make([]types.NamespacedName, 0, len(pods))
239234
for _, pod := range pods {
240-
if pod.Spec.NodeName != nodeName {
241-
continue
242-
}
243235
if pod.Status.Phase != corev1.PodRunning {
244236
// we are interested only about nodes which consume resources
245237
continue
246238
}
247-
objs = append(objs, types.NamespacedName{
239+
nodeObjs := nodeToObjsMap[pod.Spec.NodeName]
240+
nodeObjs = append(nodeObjs, types.NamespacedName{
248241
Namespace: pod.Namespace,
249242
Name: pod.Name,
250243
})
244+
nodeToObjsMap[pod.Spec.NodeName] = nodeObjs
251245
}
252-
return objs, nil
246+
return nodeToObjsMap, nil
253247
}

pkg/noderesourcetopology/cache/store_test.go

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,20 @@ limitations under the License.
1717
package cache
1818

1919
import (
20+
"errors"
21+
"fmt"
2022
"reflect"
2123
"sort"
2224
"testing"
2325

26+
"github.com/google/go-cmp/cmp"
2427
topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2"
2528
corev1 "k8s.io/api/core/v1"
2629
"k8s.io/apimachinery/pkg/api/resource"
2730
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
31+
"k8s.io/apimachinery/pkg/labels"
32+
"k8s.io/apimachinery/pkg/types"
33+
podlisterv1 "k8s.io/client-go/listers/core/v1"
2834

2935
"github.com/k8stopologyawareschedwg/podfingerprint"
3036
)
@@ -451,6 +457,157 @@ func TestResourceStoreUpdate(t *testing.T) {
451457
}
452458
}
453459

460+
func TestMakeNodeToNamespacedNamesMap(t *testing.T) {
461+
tcases := []struct {
462+
description string
463+
pods []*corev1.Pod
464+
err error
465+
expected map[string][]types.NamespacedName
466+
expectedErr error
467+
}{
468+
{
469+
description: "empty pod list",
470+
expected: make(map[string][]types.NamespacedName),
471+
},
472+
{
473+
description: "single pod NOT running",
474+
pods: []*corev1.Pod{
475+
{
476+
ObjectMeta: metav1.ObjectMeta{
477+
Namespace: "namespace1",
478+
Name: "pod1",
479+
},
480+
Spec: corev1.PodSpec{
481+
NodeName: "node1",
482+
},
483+
},
484+
},
485+
expected: make(map[string][]types.NamespacedName),
486+
},
487+
{
488+
description: "single pod running",
489+
pods: []*corev1.Pod{
490+
{
491+
ObjectMeta: metav1.ObjectMeta{
492+
Namespace: "namespace1",
493+
Name: "pod1",
494+
},
495+
Spec: corev1.PodSpec{
496+
NodeName: "node1",
497+
},
498+
Status: corev1.PodStatus{
499+
Phase: corev1.PodRunning,
500+
},
501+
},
502+
},
503+
expected: map[string][]types.NamespacedName{
504+
"node1": {
505+
{
506+
Namespace: "namespace1",
507+
Name: "pod1",
508+
},
509+
},
510+
},
511+
},
512+
{
513+
description: "few pods, single node running",
514+
pods: []*corev1.Pod{
515+
{
516+
ObjectMeta: metav1.ObjectMeta{
517+
Namespace: "namespace1",
518+
Name: "pod1",
519+
},
520+
Spec: corev1.PodSpec{
521+
NodeName: "node1",
522+
},
523+
Status: corev1.PodStatus{
524+
Phase: corev1.PodRunning,
525+
},
526+
},
527+
{
528+
ObjectMeta: metav1.ObjectMeta{
529+
Namespace: "namespace2",
530+
Name: "pod2",
531+
},
532+
Spec: corev1.PodSpec{
533+
NodeName: "node1",
534+
},
535+
Status: corev1.PodStatus{
536+
Phase: corev1.PodRunning,
537+
},
538+
},
539+
{
540+
ObjectMeta: metav1.ObjectMeta{
541+
Namespace: "namespace2",
542+
Name: "pod3",
543+
},
544+
Spec: corev1.PodSpec{
545+
NodeName: "node1",
546+
},
547+
Status: corev1.PodStatus{
548+
Phase: corev1.PodRunning,
549+
},
550+
},
551+
},
552+
expected: map[string][]types.NamespacedName{
553+
"node1": {
554+
{
555+
Namespace: "namespace1",
556+
Name: "pod1",
557+
},
558+
{
559+
Namespace: "namespace2",
560+
Name: "pod2",
561+
},
562+
{
563+
Namespace: "namespace2",
564+
Name: "pod3",
565+
},
566+
},
567+
},
568+
},
569+
}
570+
571+
for _, tcase := range tcases {
572+
t.Run(tcase.description, func(t *testing.T) {
573+
podLister := &fakePodLister{
574+
pods: tcase.pods,
575+
err: tcase.err,
576+
}
577+
got, err := makeNodeToNamespacedNamesMap(podLister, tcase.description)
578+
if err != tcase.expectedErr {
579+
t.Errorf("error mismatch: got %v expected %v", err, tcase.expectedErr)
580+
}
581+
if diff := cmp.Diff(got, tcase.expected); diff != "" {
582+
t.Errorf("unexpected result: %v", diff)
583+
}
584+
})
585+
}
586+
}
587+
588+
func TestCheckPodFingerprintForNode(t *testing.T) {
589+
tcases := []struct {
590+
description string
591+
objs []types.NamespacedName
592+
pfp string
593+
expectedErr error
594+
}{
595+
{
596+
description: "nil objs",
597+
expectedErr: podfingerprint.ErrMalformed,
598+
},
599+
}
600+
601+
for _, tcase := range tcases {
602+
t.Run(tcase.description, func(t *testing.T) {
603+
gotErr := checkPodFingerprintForNode("testing", tcase.objs, "test-node", tcase.pfp)
604+
if !errors.Is(gotErr, tcase.expectedErr) {
605+
t.Errorf("got error %v expected %v", gotErr, tcase.expectedErr)
606+
}
607+
})
608+
}
609+
}
610+
454611
func findResourceInfo(rinfos []topologyv1alpha2.ResourceInfo, name string) *topologyv1alpha2.ResourceInfo {
455612
for idx := 0; idx < len(rinfos); idx++ {
456613
if rinfos[idx].Name == name {
@@ -459,3 +616,36 @@ func findResourceInfo(rinfos []topologyv1alpha2.ResourceInfo, name string) *topo
459616
}
460617
return nil
461618
}
619+
620+
type fakePodLister struct {
621+
pods []*corev1.Pod
622+
err error
623+
}
624+
625+
type fakePodNamespaceLister struct {
626+
parent *fakePodLister
627+
namespace string
628+
}
629+
630+
func (fpl *fakePodLister) AddPod(pod *corev1.Pod) {
631+
fpl.pods = append(fpl.pods, pod)
632+
}
633+
634+
func (fpl *fakePodLister) List(selector labels.Selector) ([]*corev1.Pod, error) {
635+
return fpl.pods, fpl.err
636+
}
637+
638+
func (fpl *fakePodLister) Pods(namespace string) podlisterv1.PodNamespaceLister {
639+
return &fakePodNamespaceLister{
640+
parent: fpl,
641+
namespace: namespace,
642+
}
643+
}
644+
645+
func (fpnl *fakePodNamespaceLister) List(selector labels.Selector) ([]*corev1.Pod, error) {
646+
return nil, fmt.Errorf("not yet implemented")
647+
}
648+
649+
func (fpnl *fakePodNamespaceLister) Get(name string) (*corev1.Pod, error) {
650+
return nil, fmt.Errorf("not yet implemented")
651+
}

0 commit comments

Comments
 (0)