diff --git a/nodeadm/internal/containerd/config.go b/nodeadm/internal/containerd/config.go index e2339983d..8c416887c 100644 --- a/nodeadm/internal/containerd/config.go +++ b/nodeadm/internal/containerd/config.go @@ -30,11 +30,15 @@ type containerdTemplateVars struct { RuntimeBinaryName string } -func writeContainerdConfig(cfg *api.NodeConfig) error { - if err := writeBaseRuntimeSpec(cfg); err != nil { - return err - } +type modifier interface { + // Modify mutates a given containerd template configuration. + Modify(*containerdTemplateVars) + + // Matches determines whether the configurator is relevant to the current instance. + Matches(*api.NodeConfig) bool +} +func writeContainerdConfig(cfg *api.NodeConfig) error { containerdConfig, err := generateContainerdConfig(cfg) if err != nil { return err @@ -59,12 +63,17 @@ func writeContainerdConfig(cfg *api.NodeConfig) error { } func generateContainerdConfig(cfg *api.NodeConfig) ([]byte, error) { - instanceOptions := applyInstanceTypeMixins(cfg.Status.Instance.Type) - configVars := containerdTemplateVars{ SandboxImage: cfg.Status.Defaults.SandboxImage, - RuntimeBinaryName: instanceOptions.RuntimeBinaryName, - RuntimeName: instanceOptions.RuntimeName, + RuntimeBinaryName: "/usr/sbin/runc", + RuntimeName: "runc", + } + for _, configurator := range []modifier{ + NewNvidiaModifier(), + } { + if configurator.Matches(cfg) { + configurator.Modify(&configVars) + } } var buf bytes.Buffer if err := containerdConfigTemplate.Execute(&buf, configVars); err != nil { diff --git a/nodeadm/internal/containerd/daemon.go b/nodeadm/internal/containerd/daemon.go index 76f1766b1..22cb8edcb 100644 --- a/nodeadm/internal/containerd/daemon.go +++ b/nodeadm/internal/containerd/daemon.go @@ -20,6 +20,9 @@ func NewContainerdDaemon(daemonManager daemon.DaemonManager) daemon.Daemon { } func (cd *containerd) Configure(c *api.NodeConfig) error { + if err := writeBaseRuntimeSpec(c); err != nil { + return err + } return writeContainerdConfig(c) } diff --git a/nodeadm/internal/containerd/nvidia.go b/nodeadm/internal/containerd/nvidia.go new file mode 100644 index 000000000..fcb560e48 --- /dev/null +++ b/nodeadm/internal/containerd/nvidia.go @@ -0,0 +1,65 @@ +package containerd + +import ( + "os" + "slices" + "strings" + + "github.com/awslabs/amazon-eks-ami/nodeadm/internal/api" + "go.uber.org/zap" +) + +type nvidiaModifier struct { + pcieDevicesPath string +} + +func NewNvidiaModifier() *nvidiaModifier { + return &nvidiaModifier{ + pcieDevicesPath: "/proc/bus/pci/devices", + } +} + +func (m *nvidiaModifier) Matches(cfg *api.NodeConfig) bool { + return m.matchesInstanceType(cfg.Status.Instance.Type) || m.matchesPCIeVendor() +} + +func (*nvidiaModifier) Modify(ctrdTemplate *containerdTemplateVars) { + zap.L().Info("Configuring NVIDIA runtime..") + ctrdTemplate.RuntimeName = "nvidia" + ctrdTemplate.RuntimeBinaryName = "/usr/bin/nvidia-container-runtime" +} + +var nvidiaInstanceFamilies = []string{ + "p3", "p3dn", + "p4d", "p4de", + "p5", "p5e", "p5en", + "g4", "g4dn", + "g5", "g5g", + "g6", "g6e", +} + +// TODO: deprecate to avoid manual instance type tracking. +func (*nvidiaModifier) matchesInstanceType(instanceType string) bool { + family := strings.Split(instanceType, ".")[0] + return slices.Contains(nvidiaInstanceFamilies, family) +} + +func (m *nvidiaModifier) matchesPCIeVendor() bool { + devices, err := os.ReadFile(m.pcieDevicesPath) + if err != nil { + zap.L().Error("Failed to read PCIe devices", zap.Error(err)) + return false + } + // The contents of '/proc/bus/pci/devices' looks like the following, where + // the last column contains the vendor name if present. + // + // something like the following: + // + // 0018 1d0f1111 0 c1000008 0 0 0 0 0 c0002 400000 0 0 0 0 0 20000 + // 0020 1d0f8061 b c1508000 0 0 0 0 0 0 4000 0 0 0 0 0 0 nvme + // 0028 1d0fec20 0 c1504000 0 c1400008 0 0 0 0 4000 0 100000 0 0 0 0 ena + // 00f0 10de1eb8 a c0000000 44000000c 0 45000000c 0 0 0 1000000 10000000 0 2000000 0 0 0 nvidia + // 00f8 1d0fcd01 0 c1500000 0 c150c008 0 0 0 0 4000 0 2000 0 0 0 0 nvme + // 0030 1d0fec20 0 c1510000 0 c1600008 0 0 0 0 4000 0 100000 0 0 0 0 ena + return strings.Contains(string(devices), "nvidia") +} diff --git a/nodeadm/internal/containerd/nvidia_test.go b/nodeadm/internal/containerd/nvidia_test.go new file mode 100644 index 000000000..140388907 --- /dev/null +++ b/nodeadm/internal/containerd/nvidia_test.go @@ -0,0 +1,47 @@ +package containerd + +import ( + "os" + "path/filepath" + "testing" + + "github.com/awslabs/amazon-eks-ami/nodeadm/internal/api" + "github.com/stretchr/testify/assert" +) + +func TestNvidiaConfigurator(t *testing.T) { + + t.Run("IsNvidiaUsingInstanceType", func(t *testing.T) { + configurator := nvidiaModifier{} + template := containerdTemplateVars{} + assert.True(t, configurator.Matches(nvidiaInstanceTypeNodeConfig("p5.48xlarge"))) + configurator.Modify(&template) + assert.Equal(t, "nvidia", template.RuntimeName) + assert.Equal(t, "/usr/bin/nvidia-container-runtime", template.RuntimeBinaryName) + }) + + t.Run("IsNvidiaUsingPCIe", func(t *testing.T) { + configurator := nvidiaModifier{pcieDevicesPath: filepath.Join(t.TempDir(), "pcie-devices")} + os.WriteFile(configurator.pcieDevicesPath, []byte("nvidia"), 0777) + template := containerdTemplateVars{} + assert.True(t, configurator.Matches(nvidiaInstanceTypeNodeConfig("xxx.xxxxx"))) + configurator.Modify(&template) + assert.Equal(t, "nvidia", template.RuntimeName) + assert.Equal(t, "/usr/bin/nvidia-container-runtime", template.RuntimeBinaryName) + }) + + t.Run("IsNotNvidia", func(t *testing.T) { + configurator := nvidiaModifier{} + assert.False(t, configurator.Matches(nvidiaInstanceTypeNodeConfig("m5.large"))) + }) +} + +func nvidiaInstanceTypeNodeConfig(instanceType string) *api.NodeConfig { + return &api.NodeConfig{ + Status: api.NodeConfigStatus{ + Instance: api.InstanceDetails{ + Type: instanceType, + }, + }, + } +} diff --git a/nodeadm/internal/containerd/runtime_config.go b/nodeadm/internal/containerd/runtime_config.go deleted file mode 100644 index 99a38ccc2..000000000 --- a/nodeadm/internal/containerd/runtime_config.go +++ /dev/null @@ -1,65 +0,0 @@ -package containerd - -import ( - "slices" - "strings" - - "go.uber.org/zap" -) - -type instanceOptions struct { - RuntimeName string - RuntimeBinaryName string -} - -type instanceTypeMixin struct { - instanceFamilies []string - apply func() instanceOptions -} - -func (m *instanceTypeMixin) matches(instanceType string) bool { - instanceFamily := strings.Split(instanceType, ".")[0] - return slices.Contains(m.instanceFamilies, instanceFamily) -} - -var ( - // TODO: fetch this list dynamically - nvidiaInstances = []string{"p3", "p3dn", "p4d", "p4de", "p5", "p5e", "p5en", "g4", "g4dn", "g5", "g6", "g6e"} - NvidiaInstanceTypeMixin = instanceTypeMixin{ - instanceFamilies: nvidiaInstances, - apply: applyNvidia, - } - - mixins = []instanceTypeMixin{ - NvidiaInstanceTypeMixin, - } -) - -const nvidiaRuntimeName = "nvidia" -const nvidiaRuntimeBinaryName = "/usr/bin/nvidia-container-runtime" -const defaultRuntimeName = "runc" -const defaultRuntimeBinaryName = "/usr/sbin/runc" - -// applyInstanceTypeMixins adds the needed OCI hook options to containerd config.toml -// based on the instance family -func applyInstanceTypeMixins(instanceType string) instanceOptions { - for _, mixin := range mixins { - if mixin.matches(instanceType) { - return mixin.apply() - } - } - zap.L().Info("No instance specific containerd runtime configuration needed..", zap.String("instanceType", instanceType)) - return applyDefault() -} - -// applyNvidia adds the needed NVIDIA containerd options -func applyNvidia() instanceOptions { - zap.L().Info("Configuring NVIDIA runtime..") - return instanceOptions{RuntimeName: nvidiaRuntimeName, RuntimeBinaryName: nvidiaRuntimeBinaryName} -} - -// applyDefault adds the default runc containerd options -func applyDefault() instanceOptions { - zap.L().Info("Configuring default runtime..") - return instanceOptions{RuntimeName: defaultRuntimeName, RuntimeBinaryName: defaultRuntimeBinaryName} -} diff --git a/nodeadm/internal/containerd/runtime_config_test.go b/nodeadm/internal/containerd/runtime_config_test.go deleted file mode 100644 index 10d7f988b..000000000 --- a/nodeadm/internal/containerd/runtime_config_test.go +++ /dev/null @@ -1,31 +0,0 @@ -package containerd - -import ( - "reflect" - "testing" -) - -func TestApplyInstanceTypeMixins(t *testing.T) { - - var nvidiaExpectedOutput = instanceOptions{RuntimeName: "nvidia", RuntimeBinaryName: "/usr/bin/nvidia-container-runtime"} - var neuronExpectedOutput = instanceOptions{RuntimeName: "runc", RuntimeBinaryName: "/usr/sbin/runc"} - var nonAcceleratedExpectedOutput = instanceOptions{RuntimeName: "runc", RuntimeBinaryName: "/usr/sbin/runc"} - - var tests = []struct { - name string - instanceType string - expectedOutput instanceOptions - }{ - {name: "nvidia_test", instanceType: "p5.xlarge", expectedOutput: nvidiaExpectedOutput}, - {name: "neuron_test", instanceType: "inf2.xlarge", expectedOutput: neuronExpectedOutput}, - // non accelerated instance - {name: "non_accelerated_test", instanceType: "m5.xlarge", expectedOutput: nonAcceleratedExpectedOutput}, - } - for _, test := range tests { - expected := applyInstanceTypeMixins(test.instanceType) - - if !reflect.DeepEqual(expected, test.expectedOutput) { - t.Fatalf("unexpected output in test case %s: %s, expecting: %s", test.name, expected, test.expectedOutput) - } - } -}