Skip to content

Commit a6f5ea4

Browse files
authored
Merge pull request #504 from PiotrProkop/nrt-nodelocking
Add new NRT cache type DiscardReservedNodes
2 parents 726759a + 7882af7 commit a6f5ea4

File tree

14 files changed

+335
-0
lines changed

14 files changed

+335
-0
lines changed

apis/config/types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ type NodeResourceTopologyMatchArgs struct {
152152
ScoringStrategy ScoringStrategy
153153
// If > 0, enables the caching facilities of the reserve plugin - which must be enabled
154154
CacheResyncPeriodSeconds int64
155+
// if set to true, exclude node from scheduling if there are any reserved pods for given node
156+
// this option takes precedence over CacheResyncPeriodSeconds
157+
// if DiscardReservedNodes is enabled, CacheResyncPeriodSeconds option is noop
158+
DiscardReservedNodes bool
155159
}
156160

157161
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

apis/config/v1/types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ type NodeResourceTopologyMatchArgs struct {
149149
ScoringStrategy *ScoringStrategy `json:"scoringStrategy,omitempty"`
150150
// If > 0, enables the caching facilities of the reserve plugin - which must be enabled
151151
CacheResyncPeriodSeconds *int64 `json:"cacheResyncPeriodSeconds,omitempty"`
152+
// if set to true, exclude node from scheduling if there are any reserved pods for given node
153+
// this option takes precedence over CacheResyncPeriodSeconds
154+
// if DiscardReservedNodes is enabled, CacheResyncPeriodSeconds option is noop
155+
DiscardReservedNodes bool `json:"discardReservedNodes,omitempty"`
152156
}
153157

154158
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

apis/config/v1/zz_generated.conversion.go

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apis/config/v1beta2/types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,10 @@ type NodeResourceTopologyMatchArgs struct {
148148
// implicitely enables the caching. If zero, disables the caching entirely.
149149
// If the cache is enabled, the Reserve plugin must be enabled.
150150
CacheResyncPeriodSeconds *int64 `json:"cacheResyncPeriodSeconds,omitempty"`
151+
// if set to true, exclude node from scheduling if there are any reserved pods for given node
152+
// this option takes precedence over CacheResyncPeriodSeconds
153+
// if DiscardReservedNodes is enabled, CacheResyncPeriodSeconds option is noop
154+
DiscardReservedNodes bool `json:"discardReservedNodes,omitempty"`
151155
}
152156

153157
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

apis/config/v1beta2/zz_generated.conversion.go

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apis/config/v1beta3/types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ type NodeResourceTopologyMatchArgs struct {
152152
// implicitely enables the caching. If zero, disables the caching entirely.
153153
// If the cache is enabled, the Reserve plugin must be enabled.
154154
CacheResyncPeriodSeconds *int64 `json:"cacheResyncPeriodSeconds,omitempty"`
155+
// if set to true, exclude node from scheduling if there are any reserved pods for given node
156+
// this option takes precedence over CacheResyncPeriodSeconds
157+
// if DiscardReservedNodes is enabled, CacheResyncPeriodSeconds option is noop
158+
DiscardReservedNodes bool `json:"discardReservedNodes,omitempty"`
155159
}
156160

157161
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

apis/config/v1beta3/zz_generated.conversion.go

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/noderesourcetopology/cache/cache.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,10 @@ type Interface interface {
5656

5757
// UnreserveNodeResources decrement from the node assumed resources the resources required by the given pod.
5858
UnreserveNodeResources(nodeName string, pod *corev1.Pod)
59+
60+
// PostBind is called after a pod is successfully bound. These plugins are
61+
// informational. A common application of this extension point is for cleaning
62+
// up. If a plugin needs to clean-up its state after a pod is scheduled and
63+
// bound, PostBind is the extension point that it should register.
64+
PostBind(nodeName string, pod *corev1.Pod)
5965
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
Copyright 2023 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package cache
18+
19+
import (
20+
"sync"
21+
22+
corev1 "k8s.io/api/core/v1"
23+
"k8s.io/apimachinery/pkg/types"
24+
"k8s.io/klog/v2"
25+
26+
topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2"
27+
listerv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/listers/topology/v1alpha2"
28+
)
29+
30+
// DiscardReserved is intended to solve similiar problem as Overreserve Cache,
31+
// which is to minimize amount of incorrect scheduling decisions based on stale NRT data.
32+
// Unfortunately Overreserve cache only works for single-numa-node Topology Manager policy.
33+
// Dis tries to minimize amount of Admission Errors and non-optimal placement
34+
// when NodeResourceTopologyMatch plugin is used to schedule PODs requesting resources from multiple NUMA domains.
35+
// There are scenarios where using DiscardReserved won't mitigate drawbacks of using Passthrough cache.
36+
// NRT update is expected once PostBind triggers, but there's no guarantee about when this will happen.
37+
// In cases like:
38+
// - NFD(or any other component that advertises NRT) can be nonfunctional
39+
// - network can be slow
40+
// - Pod being scheduled after PostBind trigger and before NRT update
41+
// in those cases DiscardReserved cache will act same as Passthrough cache
42+
type DiscardReserved struct {
43+
rMutex sync.RWMutex
44+
reservationMap map[string]map[types.UID]bool // Key is NodeName, value is Pod UID : reserved status
45+
lister listerv1alpha2.NodeResourceTopologyLister
46+
}
47+
48+
func NewDiscardReserved(lister listerv1alpha2.NodeResourceTopologyLister) Interface {
49+
return &DiscardReserved{
50+
lister: lister,
51+
reservationMap: make(map[string]map[types.UID]bool),
52+
}
53+
}
54+
55+
func (pt *DiscardReserved) GetCachedNRTCopy(nodeName string, _ *corev1.Pod) (*topologyv1alpha2.NodeResourceTopology, bool) {
56+
pt.rMutex.RLock()
57+
defer pt.rMutex.RUnlock()
58+
if t, ok := pt.reservationMap[nodeName]; ok {
59+
if len(t) > 0 {
60+
return nil, false
61+
}
62+
}
63+
64+
nrt, err := pt.lister.Get(nodeName)
65+
if err != nil {
66+
return nil, false
67+
}
68+
return nrt, true
69+
}
70+
71+
func (pt *DiscardReserved) NodeMaybeOverReserved(nodeName string, pod *corev1.Pod) {}
72+
func (pt *DiscardReserved) NodeHasForeignPods(nodeName string, pod *corev1.Pod) {}
73+
74+
func (pt *DiscardReserved) ReserveNodeResources(nodeName string, pod *corev1.Pod) {
75+
klog.V(5).InfoS("nrtcache NRT Reserve", "logID", klog.KObj(pod), "UID", pod.GetUID(), "node", nodeName)
76+
pt.rMutex.Lock()
77+
defer pt.rMutex.Unlock()
78+
79+
if pt.reservationMap[nodeName] == nil {
80+
pt.reservationMap[nodeName] = make(map[types.UID]bool)
81+
}
82+
pt.reservationMap[nodeName][pod.GetUID()] = true
83+
}
84+
85+
func (pt *DiscardReserved) UnreserveNodeResources(nodeName string, pod *corev1.Pod) {
86+
klog.V(5).InfoS("nrtcache NRT Unreserve", "logID", klog.KObj(pod), "UID", pod.GetUID(), "node", nodeName)
87+
88+
pt.removeReservationForNode(nodeName, pod)
89+
}
90+
91+
// PostBind is invoked to cleanup reservationMap
92+
func (pt *DiscardReserved) PostBind(nodeName string, pod *corev1.Pod) {
93+
klog.V(5).InfoS("nrtcache NRT PostBind", "logID", klog.KObj(pod), "UID", pod.GetUID(), "node", nodeName)
94+
95+
pt.removeReservationForNode(nodeName, pod)
96+
}
97+
98+
func (pt *DiscardReserved) removeReservationForNode(nodeName string, pod *corev1.Pod) {
99+
pt.rMutex.Lock()
100+
defer pt.rMutex.Unlock()
101+
102+
delete(pt.reservationMap[nodeName], pod.GetUID())
103+
}
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
/*
2+
Copyright 2023 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package cache
18+
19+
import (
20+
"reflect"
21+
"testing"
22+
23+
topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2"
24+
faketopologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned/fake"
25+
topologyinformers "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/informers/externalversions"
26+
corev1 "k8s.io/api/core/v1"
27+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
28+
"k8s.io/apimachinery/pkg/types"
29+
)
30+
31+
func TestDiscardReservedNodesGetNRTCopy(t *testing.T) {
32+
fakeClient := faketopologyv1alpha2.NewSimpleClientset()
33+
fakeInformer := topologyinformers.NewSharedInformerFactory(fakeClient, 0).Topology().V1alpha2().NodeResourceTopologies()
34+
35+
nrtCache := NewDiscardReserved(fakeInformer.Lister())
36+
var nrtObj *topologyv1alpha2.NodeResourceTopology
37+
nrtObj, _ = nrtCache.GetCachedNRTCopy("node1", &corev1.Pod{})
38+
if nrtObj != nil {
39+
t.Fatalf("non-empty object from empty cache")
40+
}
41+
42+
nodeTopologies := []*topologyv1alpha2.NodeResourceTopology{
43+
{
44+
ObjectMeta: metav1.ObjectMeta{Name: "node1"},
45+
TopologyPolicies: []string{string(topologyv1alpha2.SingleNUMANodeContainerLevel)},
46+
Zones: topologyv1alpha2.ZoneList{
47+
{
48+
Name: "node-0",
49+
Type: "Node",
50+
Resources: topologyv1alpha2.ResourceInfoList{
51+
MakeTopologyResInfo(cpu, "20", "4"),
52+
MakeTopologyResInfo(memory, "8Gi", "8Gi"),
53+
MakeTopologyResInfo(nicResourceName, "30", "10"),
54+
},
55+
},
56+
{
57+
Name: "node-1",
58+
Type: "Node",
59+
Resources: topologyv1alpha2.ResourceInfoList{
60+
MakeTopologyResInfo(cpu, "30", "8"),
61+
MakeTopologyResInfo(memory, "8Gi", "8Gi"),
62+
MakeTopologyResInfo(nicResourceName, "30", "10"),
63+
},
64+
},
65+
},
66+
},
67+
}
68+
for _, obj := range nodeTopologies {
69+
fakeInformer.Informer().GetStore().Update(obj)
70+
}
71+
72+
nrtObj, ok := nrtCache.GetCachedNRTCopy("node1", &corev1.Pod{})
73+
if !reflect.DeepEqual(nrtObj, nodeTopologies[0]) {
74+
t.Fatalf("unexpected object from cache\ngot: %s\nexpected: %s\n", dumpNRT(nrtObj), dumpNRT(nodeTopologies[0]))
75+
}
76+
77+
if !ok {
78+
t.Fatalf("expecting GetCachedNRTCopy to return true not false")
79+
}
80+
}
81+
82+
func TestDiscardReservedNodesGetNRTCopyFails(t *testing.T) {
83+
nrtCache := DiscardReserved{
84+
reservationMap: map[string]map[types.UID]bool{
85+
"node1": {
86+
types.UID("some-uid"): true,
87+
},
88+
},
89+
}
90+
91+
nrtObj, ok := nrtCache.GetCachedNRTCopy("node1", &corev1.Pod{})
92+
if ok {
93+
t.Fatal("expected false\ngot true\n")
94+
}
95+
if nrtObj != nil {
96+
t.Fatalf("non-empty object")
97+
}
98+
}
99+
100+
func TestDiscardReservedNodesReserveNodeResources(t *testing.T) {
101+
nrtCache := DiscardReserved{
102+
reservationMap: make(map[string]map[types.UID]bool),
103+
}
104+
105+
nrtCache.ReserveNodeResources("node1", &corev1.Pod{
106+
ObjectMeta: metav1.ObjectMeta{
107+
Name: "pod",
108+
Namespace: "test",
109+
UID: "some-uid",
110+
},
111+
})
112+
nodePods, ok := nrtCache.reservationMap["node1"]
113+
if !ok {
114+
t.Fatal("expected reservationMap to have entry for node1")
115+
}
116+
117+
if len(nodePods) != 1 {
118+
t.Fatalf("expected reservationMap entry for node1 to have len 1 not: %d", len(nodePods))
119+
}
120+
121+
pod, ok := nodePods[types.UID("some-uid")]
122+
if !ok {
123+
t.Fatal("expected reservationMap for node1 to have some-uid")
124+
}
125+
126+
if !pod {
127+
t.Fatal("expected reservationMap entry node1 to have some-uid set to true not false")
128+
}
129+
}
130+
131+
func TestDiscardReservedNodesRemoveReservationForNode(t *testing.T) {
132+
nrtCache := DiscardReserved{
133+
reservationMap: make(map[string]map[types.UID]bool),
134+
}
135+
136+
pod := &corev1.Pod{
137+
ObjectMeta: metav1.ObjectMeta{
138+
Name: "pod",
139+
Namespace: "test",
140+
UID: "some-uid",
141+
},
142+
}
143+
144+
nrtCache.ReserveNodeResources("node1", pod)
145+
nodePods, ok := nrtCache.reservationMap["node1"]
146+
if !ok {
147+
t.Fatal("expected reservationMap to have entry for node1")
148+
}
149+
150+
if len(nodePods) != 1 {
151+
t.Fatalf("expected reservationMap entry for node1 to have len 1 not: %d", len(nodePods))
152+
}
153+
154+
podExists, ok := nodePods[types.UID("some-uid")]
155+
if !ok {
156+
t.Fatal("expected reservationMap for node1 to have some-uid")
157+
}
158+
159+
if !podExists {
160+
t.Fatal("expected reservationMap entry node1 to have some-uid set to true not false")
161+
}
162+
163+
nrtCache.removeReservationForNode("node1", pod)
164+
nodePods, ok = nrtCache.reservationMap["node1"]
165+
if !ok {
166+
t.Fatal("expected reservationMap to have entry for node1")
167+
}
168+
169+
if len(nodePods) != 0 {
170+
t.Fatalf("expected reservationMap entry for node1 to have len 0 not: %d", len(nodePods))
171+
}
172+
}

0 commit comments

Comments
 (0)