Skip to content

Commit 80be4a6

Browse files
authored
Merge pull request #488 from ffromani/config-attributes
noderesourcetopology: overhaul Topology Manager configuration management
2 parents 16df00a + edf60d5 commit 80be4a6

File tree

73 files changed

+3929
-703
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+3929
-703
lines changed

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ require (
88
github.com/dustin/go-humanize v1.0.0
99
github.com/go-logr/logr v1.2.3
1010
github.com/google/go-cmp v0.5.8
11-
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.0.13
12-
github.com/k8stopologyawareschedwg/podfingerprint v0.1.1
11+
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.0
12+
github.com/k8stopologyawareschedwg/podfingerprint v0.1.2
1313
github.com/patrickmn/go-cache v2.1.0+incompatible
1414
github.com/paypal/load-watcher v0.2.2
1515
github.com/spf13/pflag v1.0.5

go.sum

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -321,10 +321,10 @@ github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1
321321
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
322322
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
323323
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
324-
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.0.13 h1:Y1RjPskyGMkVtNL8lq75bEdjqgq8gi+JJ1oWaz/mIJE=
325-
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.0.13/go.mod h1:AkACMQGiTgCt0lQw3m7TTU8PLH9lYKNK5e9DqFf5VuM=
326-
github.com/k8stopologyawareschedwg/podfingerprint v0.1.1 h1:uNEj+avp3yJkJMvkmk6iosibVauSo+owEKV2JyuKNsQ=
327-
github.com/k8stopologyawareschedwg/podfingerprint v0.1.1/go.mod h1:C23pM15t06dXg/OihGlqBvnYzLr+MXDXJ7zMfbNAyXI=
324+
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.0 h1:2uCRJbv+A+fmaUaO0wLZ8oYd6cLE1dRzBQcFNxggH3s=
325+
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.0/go.mod h1:AkACMQGiTgCt0lQw3m7TTU8PLH9lYKNK5e9DqFf5VuM=
326+
github.com/k8stopologyawareschedwg/podfingerprint v0.1.2 h1:Db5KLJjPg2mKaCoeEliMlea+JMyDMWdbNPXnWbPNDyM=
327+
github.com/k8stopologyawareschedwg/podfingerprint v0.1.2/go.mod h1:C23pM15t06dXg/OihGlqBvnYzLr+MXDXJ7zMfbNAyXI=
328328
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
329329
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
330330
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=

manifests/noderesourcetopology/crd.yaml

Lines changed: 134 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
33
kind: CustomResourceDefinition
44
metadata:
55
annotations:
6-
api-approved.kubernetes.io: https://github.com/kubernetes/enhancements/pull/1870 # edited manually
6+
api-approved.kubernetes.io: https://github.com/kubernetes/enhancements/pull/1870
77
controller-gen.kubebuilder.io/version: v0.11.1
88
creationTimestamp: null
99
name: noderesourcetopologies.topology.node.k8s.io
@@ -135,4 +135,137 @@ spec:
135135
- zones
136136
type: object
137137
served: true
138+
storage: false
139+
- name: v1alpha2
140+
schema:
141+
openAPIV3Schema:
142+
description: NodeResourceTopology describes node resources and their topology.
143+
properties:
144+
apiVersion:
145+
description: 'APIVersion defines the versioned schema of this representation
146+
of an object. Servers should convert recognized schemas to the latest
147+
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
148+
type: string
149+
attributes:
150+
description: AttributeList contains an array of AttributeInfo objects.
151+
items:
152+
description: AttributeInfo contains one attribute of a Zone.
153+
properties:
154+
name:
155+
type: string
156+
value:
157+
type: string
158+
required:
159+
- name
160+
- value
161+
type: object
162+
type: array
163+
kind:
164+
description: 'Kind is a string value representing the REST resource this
165+
object represents. Servers may infer this from the endpoint the client
166+
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
167+
type: string
168+
metadata:
169+
type: object
170+
topologyPolicies:
171+
description: 'DEPRECATED (to be removed in v1beta1): use top level attributes
172+
if needed'
173+
items:
174+
type: string
175+
type: array
176+
zones:
177+
description: ZoneList contains an array of Zone objects.
178+
items:
179+
description: Zone represents a resource topology zone, e.g. socket,
180+
node, die or core.
181+
properties:
182+
attributes:
183+
description: AttributeList contains an array of AttributeInfo objects.
184+
items:
185+
description: AttributeInfo contains one attribute of a Zone.
186+
properties:
187+
name:
188+
type: string
189+
value:
190+
type: string
191+
required:
192+
- name
193+
- value
194+
type: object
195+
type: array
196+
costs:
197+
description: CostList contains an array of CostInfo objects.
198+
items:
199+
description: CostInfo describes the cost (or distance) between
200+
two Zones.
201+
properties:
202+
name:
203+
type: string
204+
value:
205+
format: int64
206+
type: integer
207+
required:
208+
- name
209+
- value
210+
type: object
211+
type: array
212+
name:
213+
type: string
214+
parent:
215+
type: string
216+
resources:
217+
description: ResourceInfoList contains an array of ResourceInfo
218+
objects.
219+
items:
220+
description: ResourceInfo contains information about one resource
221+
type.
222+
properties:
223+
allocatable:
224+
anyOf:
225+
- type: integer
226+
- type: string
227+
description: Allocatable quantity of the resource, corresponding
228+
to allocatable in node status, i.e. total amount of this
229+
resource available to be used by pods.
230+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
231+
x-kubernetes-int-or-string: true
232+
available:
233+
anyOf:
234+
- type: integer
235+
- type: string
236+
description: Available is the amount of this resource currently
237+
available for new (to be scheduled) pods, i.e. Allocatable
238+
minus the resources reserved by currently running pods.
239+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
240+
x-kubernetes-int-or-string: true
241+
capacity:
242+
anyOf:
243+
- type: integer
244+
- type: string
245+
description: Capacity of the resource, corresponding to capacity
246+
in node status, i.e. total amount of this resource that
247+
the node has.
248+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
249+
x-kubernetes-int-or-string: true
250+
name:
251+
description: Name of the resource.
252+
type: string
253+
required:
254+
- allocatable
255+
- available
256+
- capacity
257+
- name
258+
type: object
259+
type: array
260+
type:
261+
type: string
262+
required:
263+
- name
264+
- type
265+
type: object
266+
type: array
267+
required:
268+
- zones
269+
type: object
270+
served: true
138271
storage: true

pkg/noderesourcetopology/README.md

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ NodeResourceTopologyMatch plugin to work properly requires specific version of N
2929

3030
| Scheduler Plugins | NodeResourceTopology CRD version |
3131
|-------------------|----------------------------------|
32-
| master | v0.0.13 |
32+
| master | v0.1.0 |
3333
| v0.24.9 | v0.0.12 |
3434
| v0.23.10 | v0.0.12 |
3535
| v0.22.6 | v0.0.12 |
@@ -41,7 +41,7 @@ In case NodeResourceTopology CRD is being installed and advertised by [NFD](http
4141

4242
| Scheduler Plugins | NodeResourceTopology CRD version | NFD version |
4343
|-------------------|----------------------------------|-------------|
44-
| master | v0.0.13 | master |
44+
| master | v0.1.0 | master |
4545
| v0.24.9 | v0.0.12 | > v0.10.0 |
4646
| v0.23.10 | v0.0.12 | > v0.10.0 |
4747
| v0.22.6 | v0.0.12 | > v0.10.0 |
@@ -155,6 +155,27 @@ Should the cluster need to have different settings (e.g. topology manager) or NU
155155
using [affinity](https://kubernetes.io/docs/user-guide/node-selection/#node-affinity-beta-feature) or also
156156
[taints](https://kubernetes.io/docs/user-guide/node-selection/#taints-and-toleations-beta-feature).
157157

158+
#### Topology Manager configuration
159+
160+
***Target audience: developers and operators of topology updaters (NodeResourceTopology producers)***
161+
162+
In addition to logically partitioning a cluster like explained above, the topology-aware scheduler needs to know key node-specific configuration settings like Topology manager policy and scope.
163+
This data is expected to be provided as top-level `Attributes` of the NodeResourceTopology objects:
164+
165+
NodeResourceTopology producers should add top-level `Attributes` in the following format
166+
- For `Name` and `Value` of attributes, words should be `snakeCase`
167+
- The `Name` of each attribute should be **the same of the corresponding kubelet configuration option**.
168+
- example: `--topology-manager-scope` becomes `topologyManagerScope`
169+
- example: `topologyManagerPolicy` becomes `topologyManagerPolicy`
170+
- The `Value` of each attribute should be **one of the value of the corresponding kubelet configuration option, VERBATIM**.
171+
- example: `single-numa-node` becomes `single-numa-node`
172+
- Should `topologyManagerOptions` be exposed:
173+
- they should be expanded in key-value pairs, using the `String()` representation
174+
- each key-value pair should be preceded by the `topologyManagerOption` prefix
175+
- every other provision described above applies
176+
- example: the `prefer-closest-numa-nodes` option becomes `topologyManagerOptionPreferClosestNumaNodes`, accepting exactly one of either `true` and `false`.
177+
- **RATIONALE**: this representation wants to guarantee all the Attribute Names are unique (no aliasing). It must be noted this is a stricter requirement with respect to the Attribute representation
178+
in NRT objects, and this requirement could be lifted in the future (an upgrade path will be provided).
158179

159180
### Demo
160181

@@ -168,7 +189,7 @@ For configuring your cluster with [NFD-topology updater](https://github.com/kube
168189

169190
```yaml
170191
# Worker Node A CRD spec
171-
apiVersion: topology.node.k8s.io/v1alpha1
192+
apiVersion: topology.node.k8s.io/v1alpha2
172193
kind: NodeResourceTopology
173194
metadata:
174195
name: worker-node-A
@@ -202,7 +223,7 @@ zones:
202223

203224
```yaml
204225
# Worker Node B CRD spec
205-
apiVersion: topology.node.k8s.io/v1alpha1
226+
apiVersion: topology.node.k8s.io/v1alpha2
206227
kind: NodeResourceTopology
207228
metadata:
208229
name: worker-node-B

pkg/noderesourcetopology/cache/cache.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package cache
1919
import (
2020
corev1 "k8s.io/api/core/v1"
2121

22-
topologyv1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
22+
topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2"
2323
)
2424

2525
type Interface interface {
@@ -30,7 +30,7 @@ type Interface interface {
3030
// The pod argument is used only for logging purposes.
3131
// Returns a boolean to signal the caller if the NRT data is clean. If false, then the node has foreign
3232
// Pods detected - so it should be ignored or handled differently by the caller.
33-
GetCachedNRTCopy(nodeName string, pod *corev1.Pod) (*topologyv1alpha1.NodeResourceTopology, bool)
33+
GetCachedNRTCopy(nodeName string, pod *corev1.Pod) (*topologyv1alpha2.NodeResourceTopology, bool)
3434

3535
// NodeMaybeOverReserved declares a node was filtered out for not enough resources available.
3636
// This means this node is eligible for a resync. When a node is marked discarded (dirty), it matters not

pkg/noderesourcetopology/cache/overreserve.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ import (
2828
"k8s.io/klog/v2"
2929
"k8s.io/kubernetes/pkg/scheduler/framework"
3030

31-
topologyv1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
32-
listerv1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/listers/topology/v1alpha1"
31+
topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2"
32+
listerv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/listers/topology/v1alpha2"
3333

3434
"sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/stringify"
3535

@@ -44,11 +44,11 @@ type OverReserve struct {
4444
// to resync nodes. See The documentation of Resync() below for more details.
4545
nodesMaybeOverreserved counter
4646
nodesWithForeignPods counter
47-
nrtLister listerv1alpha1.NodeResourceTopologyLister
47+
nrtLister listerv1alpha2.NodeResourceTopologyLister
4848
nodeIndexer NodeIndexer
4949
}
5050

51-
func NewOverReserve(lister listerv1alpha1.NodeResourceTopologyLister, indexer NodeIndexer) (*OverReserve, error) {
51+
func NewOverReserve(lister listerv1alpha2.NodeResourceTopologyLister, indexer NodeIndexer) (*OverReserve, error) {
5252
if lister == nil || indexer == nil {
5353
return nil, fmt.Errorf("nrtcache: received nil references")
5454
}
@@ -70,7 +70,7 @@ func NewOverReserve(lister listerv1alpha1.NodeResourceTopologyLister, indexer No
7070
return obj, nil
7171
}
7272

73-
func (ov *OverReserve) GetCachedNRTCopy(nodeName string, pod *corev1.Pod) (*topologyv1alpha1.NodeResourceTopology, bool) {
73+
func (ov *OverReserve) GetCachedNRTCopy(nodeName string, pod *corev1.Pod) (*topologyv1alpha2.NodeResourceTopology, bool) {
7474
ov.lock.Lock()
7575
defer ov.lock.Unlock()
7676
if ov.nodesWithForeignPods.IsSet(nodeName) {
@@ -197,7 +197,7 @@ func (ov *OverReserve) Resync() {
197197
klog.V(6).InfoS("nrtcache: resync NodeTopology cache starting", "logID", logID)
198198
defer klog.V(6).InfoS("nrtcache: resync NodeTopology cache complete", "logID", logID)
199199

200-
var nrtUpdates []*topologyv1alpha1.NodeResourceTopology
200+
var nrtUpdates []*topologyv1alpha2.NodeResourceTopology
201201
for _, nodeName := range nodeNames {
202202
nrtCandidate, err := ov.nrtLister.Get(nodeName)
203203
if err != nil {
@@ -237,7 +237,7 @@ func (ov *OverReserve) Resync() {
237237
}
238238

239239
// FlushNodes drops all the cached information about a given node, resetting its state clean.
240-
func (ov *OverReserve) FlushNodes(logID string, nrts ...*topologyv1alpha1.NodeResourceTopology) {
240+
func (ov *OverReserve) FlushNodes(logID string, nrts ...*topologyv1alpha2.NodeResourceTopology) {
241241
ov.lock.Lock()
242242
defer ov.lock.Unlock()
243243
for _, nrt := range nrts {

0 commit comments

Comments
 (0)