File tree Expand file tree Collapse file tree 2 files changed +97
-0
lines changed
Expand file tree Collapse file tree 2 files changed +97
-0
lines changed Original file line number Diff line number Diff line change 1+ # One pod, one container asking 2 GPUs
2+ ---
3+ apiVersion : v1
4+ kind : Namespace
5+ metadata :
6+ name : gpu-test1
7+ ---
8+ apiVersion : resource.k8s.io/v1beta1
9+ kind : ResourceClaimTemplate
10+ metadata :
11+ namespace : gpu-test1
12+ name : double-gpu
13+ spec :
14+ spec :
15+ devices :
16+ requests :
17+ - name : gpu
18+ deviceClassName : gpu.nvidia.com
19+ count : 2
20+ ---
21+ apiVersion : v1
22+ kind : Pod
23+ metadata :
24+ namespace : gpu-test1
25+ name : gpu-pod
26+ labels :
27+ app : pod
28+ spec :
29+ containers :
30+ - name : ctr0
31+ image : ubuntu:22.04
32+ command : ["bash", "-c"]
33+ args : ["nvidia-smi; trap 'exit 0' TERM; sleep 9999 & wait"]
34+ resources :
35+ claims :
36+ - name : twogpus
37+ resourceClaims :
38+ - name : twogpus
39+ resourceClaimTemplateName : double-gpu
40+ tolerations :
41+ - key : " nvidia.com/gpu"
42+ operator : " Exists"
43+ effect : " NoSchedule"
Original file line number Diff line number Diff line change 1+ # One pod, two containers
2+ # Each container asking for 1 distinct GPU
3+
4+ ---
5+ apiVersion : v1
6+ kind : Namespace
7+ metadata :
8+ name : gpu-test1
9+
10+ ---
11+ apiVersion : resource.k8s.io/v1beta1
12+ kind : ResourceClaimTemplate
13+ metadata :
14+ namespace : gpu-test1
15+ name : single-gpu
16+ spec :
17+ spec :
18+ devices :
19+ requests :
20+ - name : gpu
21+ deviceClassName : gpu.nvidia.com
22+ ---
23+ apiVersion : v1
24+ kind : Pod
25+ metadata :
26+ namespace : gpu-test1
27+ name : gpu-pod
28+ labels :
29+ app : pod
30+ spec :
31+ containers :
32+ - name : ctr0
33+ image : ubuntu:22.04
34+ command : ["bash", "-c"]
35+ args : ["nvidia-smi -L; trap 'exit 0' TERM; sleep 9999 & wait"]
36+ resources :
37+ claims :
38+ - name : gpu0
39+ - name : ctr1
40+ image : ubuntu:22.04
41+ command : ["bash", "-c"]
42+ args : ["nvidia-smi -L; trap 'exit 0' TERM; sleep 9999 & wait"]
43+ resources :
44+ claims :
45+ - name : gpu1
46+ resourceClaims :
47+ - name : gpu0
48+ resourceClaimTemplateName : single-gpu
49+ - name : gpu1
50+ resourceClaimTemplateName : single-gpu
51+ tolerations :
52+ - key : " nvidia.com/gpu"
53+ operator : " Exists"
54+ effect : " NoSchedule"
You can’t perform that action at this time.
0 commit comments