Skip to content

Commit fc70407

Browse files
authored
Add support to docker runtime for OCI access to NVIDIA GPUs (#20959)
* Add support for --device=nvidia.com/gpu=all * Add "nvidia.com" to valid options for gpus flag in TestValidateGPUs.
1 parent f4dd7b4 commit fc70407

File tree

6 files changed

+19
-10
lines changed

6 files changed

+19
-10
lines changed

cmd/minikube/cmd/start.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,8 +1479,8 @@ func validateGPUs(value, drvName, rtime string) error {
14791479
if err := validateGPUsArch(); err != nil {
14801480
return err
14811481
}
1482-
if value != "nvidia" && value != "all" && value != "amd" {
1483-
return errors.Errorf(`The gpus flag must be passed a value of "nvidia", "amd" or "all"`)
1482+
if value != "nvidia" && value != "all" && value != "amd" && value != "nvidia.com" {
1483+
return errors.Errorf(`The gpus flag must be passed a value of "nvidia", "nvidia.com", "amd" or "all"`)
14841484
}
14851485
if drvName == constants.Docker && (rtime == constants.Docker || rtime == constants.DefaultContainerRuntime) {
14861486
return nil

cmd/minikube/cmd/start_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,7 @@ func TestValidateGPUs(t *testing.T) {
814814
{"nvidia", "docker", "", ""},
815815
{"all", "kvm", "docker", "The gpus flag can only be used with the docker driver and docker container-runtime"},
816816
{"nvidia", "docker", "containerd", "The gpus flag can only be used with the docker driver and docker container-runtime"},
817-
{"cat", "docker", "docker", `The gpus flag must be passed a value of "nvidia", "amd" or "all"`},
817+
{"cat", "docker", "docker", `The gpus flag must be passed a value of "nvidia", "nvidia.com", "amd" or "all"`},
818818
{"amd", "docker", "docker", ""},
819819
{"amd", "docker", "", ""},
820820
{"amd", "docker", "containerd", "The gpus flag can only be used with the docker driver and docker container-runtime"},

pkg/drivers/kic/oci/oci.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,8 @@ func CreateContainerNode(p CreateParams) error { //nolint to suppress cyclomatic
194194
switch p.GPUs {
195195
case "all", "nvidia":
196196
runArgs = append(runArgs, "--gpus", "all", "--env", "NVIDIA_DRIVER_CAPABILITIES=all")
197+
case "nvidia.com":
198+
runArgs = append(runArgs, "--device", "nvidia.com/gpu=all")
197199
case "amd":
198200
/* https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html
199201
* "--security-opt seccomp=unconfined" is also required but included above.

pkg/minikube/cruntime/docker.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ func (r *Docker) configureDocker(driver string) error {
595595
}
596596

597597
switch r.GPUs {
598-
case "all", "nvidia":
598+
case "all", "nvidia", "nvidia.com":
599599
assets.Addons["nvidia-device-plugin"].EnableByDefault()
600600
daemonConfig.DefaultRuntime = "nvidia"
601601
runtimes := &dockerDaemonRuntimes{}

site/content/en/docs/commands/start.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ minikube start [flags]
5757
--feature-gates string A set of key=value pairs that describe feature gates for alpha/experimental features.
5858
--force Force minikube to perform possibly dangerous operations
5959
--force-systemd If set, force the container runtime to use systemd as cgroup manager. Defaults to false.
60-
-g, --gpus string Allow pods to use your GPUs. Options include: [all,nvidia,amd] (Docker driver with Docker container-runtime only)
60+
-g, --gpus string Allow pods to use your GPUs. Options include: [all,nvidia,amd,nvidia.com] (Docker driver with Docker container-runtime only)
6161
--ha Create Highly Available Multi-Control Plane Cluster with a minimum of three control-plane nodes that will also be marked for work.
6262
--host-dns-resolver Enable host resolver for NAT DNS requests (virtualbox driver only) (default true)
6363
--host-only-cidr string The CIDR to be used for the minikube VM (virtualbox driver only) (default "192.168.59.1/24")

site/content/en/docs/tutorials/nvidia.md

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ date: 2018-01-02
3030
sudo sysctl -p
3131
```
3232

33-
- Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) on your host machine
33+
- Install NVIDIA support using one of:
34+
- Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) on your host machine
35+
- Enable [NVIDIA CDI resources](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html) on your host machine.
3436

3537
- Configure Docker:
3638
```shell
@@ -45,10 +47,15 @@ date: 2018-01-02
4547
```
4648
This will make sure minikube does any required setup or addon installs now that the nvidia runtime is available.
4749

48-
- Start minikube:
49-
```shell
50-
minikube start --driver docker --container-runtime docker --gpus all
51-
```
50+
- Start minikube with one of:
51+
- The NVIDIA Container Toolkit
52+
```shell
53+
minikube start --driver docker --container-runtime docker --gpus all
54+
```
55+
- NVIDIA CDI resources
56+
```shell
57+
minikube start --driver docker --container-runtime docker --gpus nvidia.com
58+
```
5259

5360
{{% /tab %}}
5461
{{% tab none %}}

0 commit comments

Comments
 (0)