Skip to content

Commit c886e01

Browse files
authored
Fix Nvidia device plugin that is running on CPU (#78)
* Fix Nvidia device plugin that is running on CPU **Description** This is for Github issue: #48 **Testing Done** Verified and there's no nvidia device plugin pod running on CPU node * Update nvidia-device-plugin instance labels and reorder HP CLI instance allowlist
1 parent 3cbd56e commit c886e01

File tree

2 files changed

+69
-35
lines changed

2 files changed

+69
-35
lines changed

helm_chart/HyperPodHelmChart/values.yaml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,40 @@ nvidia-device-plugin:
138138
requiredDuringSchedulingIgnoredDuringExecution:
139139
nodeSelectorTerms:
140140
- matchExpressions:
141+
- key: node.kubernetes.io/instance-type
142+
operator: In
143+
values:
144+
- ml.g5.xlarge
145+
- ml.g5.2xlarge
146+
- ml.g5.4xlarge
147+
- ml.g5.8xlarge
148+
- ml.g5.12xlarge
149+
- ml.g5.16xlarge
150+
- ml.g5.24xlarge
151+
- ml.g5.48xlarge
152+
- ml.g6.xlarge
153+
- ml.g6.2xlarge
154+
- ml.g6.4xlarge
155+
- ml.g6.8xlarge
156+
- ml.g6.16xlarge
157+
- ml.g6.12xlarge
158+
- ml.g6.24xlarge
159+
- ml.g6.48xlarge
160+
- ml.g6e.xlarge
161+
- ml.g6e.2xlarge
162+
- ml.g6e.4xlarge
163+
- ml.g6e.8xlarge
164+
- ml.g6e.12xlarge
165+
- ml.g6e.16xlarge
166+
- ml.g6e.24xlarge
167+
- ml.g6e.48xlarge
168+
- ml.gr6.4xlarge
169+
- ml.gr6.8xlarge
170+
- ml.p4d.24xlarge
171+
- ml.p4de.24xlarge
172+
- ml.p5.48xlarge
173+
- ml.p5e.48xlarge
174+
- ml.p5en.48xlarge
141175
# nvidia plugin needs at least one node selector. Below label exists for all hyperpod nodes
142176
- key: kubernetes.io/os
143177
operator: In

src/hyperpod_cli/constants/hyperpod_instance_types.py

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,6 @@
1414

1515

1616
class HyperpodInstanceType(Enum):
17-
ML_P4D_24XLARGE = "ml.p4d.24xlarge"
18-
ML_P4DE_24XLARGE = "ml.p4de.24xlarge"
19-
ML_P5_48XLARGE = "ml.p5.48xlarge"
20-
ML_TRN1_32XLARGE = "ml.trn1.32xlarge"
21-
ML_TRN1N_32XLARGE = "ml.trn1n.32xlarge"
22-
ML_G5_XLARGE = "ml.g5.xlarge"
23-
ML_G5_2XLARGE = "ml.g5.2xlarge"
24-
ML_G5_4XLARGE = "ml.g5.4xlarge"
25-
ML_G5_8XLARGE = "ml.g5.8xlarge"
26-
ML_G5_12XLARGE = "ml.g5.12xlarge"
27-
ML_G5_16XLARGE = "ml.g5.16xlarge"
28-
ML_G5_24XLARGE = "ml.g5.24xlarge"
29-
ML_G5_48XLARGE = "ml.g5.48xlarge"
3017
ML_C5_LARGE = "ml.c5.large"
3118
ML_C5_XLARGE = "ml.c5.xlarge"
3219
ML_C5_2XLARGE = "ml.c5.2xlarge"
@@ -40,6 +27,32 @@ class HyperpodInstanceType(Enum):
4027
ML_C5N_4XLARGE = "ml.c5n.4xlarge"
4128
ML_C5N_9XLARGE = "ml.c5n.9xlarge"
4229
ML_C5N_18XLARGE = "ml.c5n.18xlarge"
30+
ML_G5_XLARGE = "ml.g5.xlarge"
31+
ML_G5_2XLARGE = "ml.g5.2xlarge"
32+
ML_G5_4XLARGE = "ml.g5.4xlarge"
33+
ML_G5_8XLARGE = "ml.g5.8xlarge"
34+
ML_G5_12XLARGE = "ml.g5.12xlarge"
35+
ML_G5_16XLARGE = "ml.g5.16xlarge"
36+
ML_G5_24XLARGE = "ml.g5.24xlarge"
37+
ML_G5_48XLARGE = "ml.g5.48xlarge"
38+
ML_G6_XLARGE = "ml.g6.xlarge"
39+
ML_G6_2XLARGE = "ml.g6.2xlarge"
40+
ML_G6_4XLARGE = "ml.g6.4xlarge"
41+
ML_G6_8XLARGE = "ml.g6.8xlarge"
42+
ML_G6_12XLARGE = "ml.g6.12xlarge"
43+
ML_G6_16XLARGE = "ml.g6.16xlarge"
44+
ML_G6_24XLARGE = "ml.g6.24xlarge"
45+
ML_G6_48XLARGE = "ml.g6.48xlarge"
46+
ML_G6E_XLARGE = "ml.g6e.xlarge"
47+
ML_G6E_2XLARGE = "ml.g6e.2xlarge"
48+
ML_G6E_4XLARGE = "ml.g6e.4xlarge"
49+
ML_G6E_8XLARGE = "ml.g6e.8xlarge"
50+
ML_G6E_12XLARGE = "ml.g6e.12xlarge"
51+
ML_G6E_16XLARGE = "ml.g6e.16xlarge"
52+
ML_G6E_24XLARGE = "ml.g6e.24xlarge"
53+
ML_G6E_48XLARGE = "ml.g6e.48xlarge"
54+
ML_GR6_4XLARGE = "ml.gr6.4xlarge"
55+
ML_GR6_8XLARGE = "ml.gr6.8xlarge"
4356
ML_I3EN_LARGE = "ml.i3en.large"
4457
ML_I3EN_XLARGE = "ml.i3en.xlarge"
4558
ML_I3EN_2XLARGE = "ml.i3en.2xlarge"
@@ -64,28 +77,9 @@ class HyperpodInstanceType(Enum):
6477
ML_M7I_16XLARGE = "ml.m7i.16xlarge"
6578
ML_M7I_24XLARGE = "ml.m7i.24xlarge"
6679
ML_M7I_48XLARGE = "ml.m7i.48xlarge"
67-
ML_T3_MEDIUM = "ml.t3.medium"
68-
ML_T3_LARGE = "ml.t3.large"
69-
ML_T3_XLARGE = "ml.t3.xlarge"
70-
ML_T3_2XLARGE = "ml.t3.2xlarge"
71-
ML_G6_XLARGE = "ml.g6.xlarge"
72-
ML_G6_2XLARGE = "ml.g6.2xlarge"
73-
ML_G6_4XLARGE = "ml.g6.4xlarge"
74-
ML_G6_8XLARGE = "ml.g6.8xlarge"
75-
ML_G6_16XLARGE = "ml.g6.16xlarge"
76-
ML_G6_12XLARGE = "ml.g6.12xlarge"
77-
ML_G6_24XLARGE = "ml.g6.24xlarge"
78-
ML_G6_48XLARGE = "ml.g6.48xlarge"
79-
ML_GR6_4XLARGE = "ml.gr6.4xlarge"
80-
ML_GR6_8XLARGE = "ml.gr6.8xlarge"
81-
ML_G6E_XLARGE = "ml.g6e.xlarge"
82-
ML_G6E_2XLARGE = "ml.g6e.2xlarge"
83-
ML_G6E_4XLARGE = "ml.g6e.4xlarge"
84-
ML_G6E_8XLARGE = "ml.g6e.8xlarge"
85-
ML_G6E_16XLARGE = "ml.g6e.16xlarge"
86-
ML_G6E_12XLARGE = "ml.g6e.12xlarge"
87-
ML_G6E_24XLARGE = "ml.g6e.24xlarge"
88-
ML_G6E_48XLARGE = "ml.g6e.48xlarge"
80+
ML_P4D_24XLARGE = "ml.p4d.24xlarge"
81+
ML_P4DE_24XLARGE = "ml.p4de.24xlarge"
82+
ML_P5_48XLARGE = "ml.p5.48xlarge"
8983
ML_P5E_48XLARGE = "ml.p5e.48xlarge"
9084
ML_P5EN_48XLARGE = "ml.p5en.48xlarge"
9185
ML_R7I_LARGE = "ml.r7i.large"
@@ -97,4 +91,10 @@ class HyperpodInstanceType(Enum):
9791
ML_R7I_16XLARGE = "ml.r7i.16xlarge"
9892
ML_R7I_24XLARGE = "ml.r7i.24xlarge"
9993
ML_R7I_48XLARGE = "ml.r7i.48xlarge"
94+
ML_T3_MEDIUM = "ml.t3.medium"
95+
ML_T3_LARGE = "ml.t3.large"
96+
ML_T3_XLARGE = "ml.t3.xlarge"
97+
ML_T3_2XLARGE = "ml.t3.2xlarge"
98+
ML_TRN1_32XLARGE = "ml.trn1.32xlarge"
99+
ML_TRN1N_32XLARGE = "ml.trn1n.32xlarge"
100100
ML_TRN2_48XLARGE = "ml.trn2.48xlarge"

0 commit comments

Comments
 (0)