Skip to content

Commit 18479f7

Browse files
ndbaker1fletcherw
authored andcommitted
feat(nodeadm): pass nvidia gpu startup labels to kubelet
1 parent aca08ad commit 18479f7

File tree

6 files changed

+120
-5
lines changed

6 files changed

+120
-5
lines changed

nodeadm/internal/kubelet/config.go

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,26 @@ func (ksc *kubeletConfig) withOutpostSetup(cfg *api.NodeConfig) error {
213213
return nil
214214
}
215215

216+
func (ksc *kubeletConfig) withNodeLabels(flags map[string]string, nodeLabelFuncs map[string]LabelValueFunc) {
217+
var nodeLabels []string
218+
for nodeLabelKey, nodeLabelFunc := range nodeLabelFuncs {
219+
nodeLabelValue, ok, err := nodeLabelFunc()
220+
if err != nil {
221+
zap.L().Error("Failed to get node label value", zap.String("key", nodeLabelKey), zap.Error(err))
222+
continue
223+
}
224+
if !ok {
225+
continue
226+
}
227+
nodeLabel := fmt.Sprintf("%s=%s", nodeLabelKey, nodeLabelValue)
228+
zap.L().Info("Adding node label", zap.String("label", nodeLabel))
229+
nodeLabels = append(nodeLabels, nodeLabel)
230+
}
231+
if len(nodeLabels) > 0 {
232+
flags["node-labels"] = strings.Join(nodeLabels, ",")
233+
}
234+
}
235+
216236
func (ksc *kubeletConfig) withNodeIp(cfg *api.NodeConfig, flags map[string]string) error {
217237
nodeIp, err := getNodeIp(context.TODO(), cfg, imds.DefaultClient())
218238
if err != nil {
@@ -284,14 +304,13 @@ func (ksc *kubeletConfig) withDefaultReservedResources(cfg *api.NodeConfig, reso
284304
//
285305
// TODO: revisit once the minimum supportted version catches up or the container
286306
// runtime is moved to containerd 2.0
287-
func (ksc *kubeletConfig) withPodInfraContainerImage(cfg *api.NodeConfig, flags map[string]string) error {
307+
func (ksc *kubeletConfig) withPodInfraContainerImage(cfg *api.NodeConfig, flags map[string]string) {
288308
// the flag is a noop on 1.29+, since the behavior was changed to use the
289309
// CRI image pinning behavior and no longer considers the flag value.
290310
// see: https://github.com/kubernetes/kubernetes/pull/118544
291311
if semver.Compare(cfg.Status.KubeletVersion, "v1.29.0") < 0 {
292312
flags["pod-infra-container-image"] = cfg.Status.Defaults.SandboxImage
293313
}
294-
return nil
295314
}
296315

297316
func (ksc *kubeletConfig) withImageServiceEndpoint(cfg *api.NodeConfig, resources system.Resources) {
@@ -312,14 +331,15 @@ func (k *kubelet) GenerateKubeletConfig(cfg *api.NodeConfig) (*kubeletConfig, er
312331
if err := kubeletConfig.withNodeIp(cfg, k.flags); err != nil {
313332
return nil, err
314333
}
315-
if err := kubeletConfig.withPodInfraContainerImage(cfg, k.flags); err != nil {
316-
return nil, err
317-
}
318334

319335
kubeletConfig.withVersionToggles(cfg)
320336
kubeletConfig.withCloudProvider(cfg, k.flags)
321337
kubeletConfig.withDefaultReservedResources(cfg, k.resources)
322338
kubeletConfig.withImageServiceEndpoint(cfg, k.resources)
339+
kubeletConfig.withNodeLabels(k.flags, map[string]LabelValueFunc{
340+
// see: https://github.com/NVIDIA/gpu-operator/commit/e25291b86cf4542ac62d8635cda4bd653c4face3
341+
"nvidia.com/gpu.present": getNvidiaGPULabel,
342+
})
323343

324344
return &kubeletConfig, nil
325345
}

nodeadm/internal/kubelet/labels.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package kubelet
2+
3+
import (
4+
"github.com/awslabs/amazon-eks-ami/nodeadm/internal/system"
5+
)
6+
7+
type LabelValueFunc func() (string, bool, error)
8+
9+
func getNvidiaGPULabel() (string, bool, error) {
10+
ok, err := system.IsPCIVendorAttached(system.NVIDIA_VENDOR_ID)
11+
if err != nil {
12+
return "", false, err
13+
}
14+
if !ok {
15+
return "", false, nil
16+
}
17+
return "true", true, nil
18+
}

nodeadm/internal/system/devices.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package system
2+
3+
import (
4+
"os"
5+
"path/filepath"
6+
"strings"
7+
)
8+
9+
const NVIDIA_VENDOR_ID = "0x10de"
10+
11+
// IsPCIVendorAttached returns whether any pcie devices with a given vendor id
12+
// are attached to the instance.
13+
func IsPCIVendorAttached(vendorId string) (bool, error) {
14+
vendorPaths, err := filepath.Glob("/sys/bus/pci/devices/*/vendor")
15+
if err != nil {
16+
return false, err
17+
}
18+
for _, vendorPath := range vendorPaths {
19+
vendorIdBytes, err := os.ReadFile(filepath.Clean(vendorPath))
20+
if err != nil {
21+
continue
22+
}
23+
if strings.TrimSpace(string(vendorIdBytes)) == vendorId {
24+
return true, nil
25+
}
26+
}
27+
return false, nil
28+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
apiVersion: node.eks.aws/v1alpha1
3+
kind: NodeConfig
4+
spec:
5+
cluster:
6+
name: my-cluster
7+
apiServerEndpoint: https://example.com
8+
certificateAuthority: Y2VydGlmaWNhdGVBdXRob3JpdHk=
9+
cidr: 10.100.0.0/16
10+
kubelet:
11+
flags:
12+
- --node-labels=foo=bar
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/usr/bin/env bash
2+
3+
set -o errexit
4+
set -o nounset
5+
set -o pipefail
6+
7+
source /helpers.sh
8+
9+
mock::aws
10+
mock::kubelet 1.32.0
11+
wait::dbus-ready
12+
13+
nodeadm init --skip run --config-source file://config.yaml
14+
15+
assert::file-not-contains /etc/eks/kubelet/environment '--node-labels=nvidia.com/gpu.present'
16+
assert::file-contains /etc/eks/kubelet/environment '--node-labels=foo=bar'
17+
18+
# mock a pci device with the nvidia vendor-id.
19+
mock::pci-device "0x10de"
20+
21+
nodeadm init --skip run --config-source file://config.yaml
22+
23+
assert::file-contains /etc/eks/kubelet/environment '--node-labels=nvidia.com/gpu.present=true'
24+
assert::file-contains /etc/eks/kubelet/environment '--node-labels=foo=bar'

nodeadm/test/e2e/helpers.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,19 @@ function mock::setup-local-disks() {
9999
chmod +x /usr/bin/setup-local-disks
100100
}
101101

102+
function mock::pci-device() {
103+
# since we cannot modify the kernel sysfs paths, we bind mount our mock
104+
# directory on top of an existing pci device. the only requirement for the check
105+
# is that the vendor ID file is correct.
106+
#
107+
# NOTE: this currently only supports mocking a single device at a time.
108+
local pcie_vendor=$1
109+
pci_mock_dst=/sys/bus/pci/devices/$(ls /sys/bus/pci/devices/ | head -n 1)
110+
pci_mock_src=$(mktemp -d)
111+
echo "$pcie_vendor" > $pci_mock_src/vendor
112+
mount --bind $pci_mock_src $pci_mock_dst
113+
}
114+
102115
function wait::path-exists() {
103116
if [ "$#" -ne 1 ]; then
104117
echo "Usage: wait::path-exists TARGET_PATH"

0 commit comments

Comments
 (0)