Skip to content

Commit 642b560

Browse files
committed
Corrections
1 parent e9725a2 commit 642b560

File tree

3 files changed

+21
-25
lines changed

3 files changed

+21
-25
lines changed

helm_chart/HyperPodHelmChartForRIG/Chart.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,6 @@ dependencies:
3939
version: "0.1.0"
4040
repository: "file://charts/neuron-device-plugin"
4141
condition: neuron-device-plugin.devicePlugin.enabled
42-
- name: health-monitoring-agent
43-
version: "0.1.0"
44-
repository: "file://charts/health-monitoring-agent"
45-
condition: health_monitoring_agent.enabled
4642
- name: mpi-operator
4743
version: "0.1.0"
4844
repository: "file://charts/mpi-operator"

helm_chart/HyperPodHelmChartForRIG/values.yaml

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,32 +17,20 @@ coredns:
1717
- key: CriticalAddonsOnly
1818
operator: Exists
1919
- effect: NoSchedule
20-
operator: Exists
20+
operator: Equal
2121
key: "sagemaker.amazonaws.com/RestrictedNode"
2222
value: "Worker"
2323
nodeSelector:
2424
"sagemaker.amazonaws.com/instance-group-type": "Restricted"
2525

26-
health-monitoring-agent:
27-
tolerations:
28-
- effect: NoSchedule
29-
operator: Exists
30-
- effect: NoExecute
31-
operator: Exists
32-
- effect: NoSchedule
33-
operator: Exists
34-
key: "sagemaker.amazonaws.com/RestrictedNode"
35-
value: "Worker"
36-
nodeSelector: {}
37-
3826
mpi-operator:
3927
tolerations:
4028
- key: "sagemaker.amazonaws.com/node-health-status"
4129
operator: "Equal"
4230
value: "Unschedulable"
4331
effect: "NoSchedule"
4432
- effect: NoSchedule
45-
operator: Exists
33+
operator: Equal
4634
key: "sagemaker.amazonaws.com/RestrictedNode"
4735
value: "Worker"
4836
nodeSelector:
@@ -60,15 +48,15 @@ neuron-device-plugin:
6048
value: Unschedulable
6149
effect: NoSchedule
6250
- effect: NoSchedule
63-
operator: Exists
51+
operator: Equal
6452
key: "sagemaker.amazonaws.com/RestrictedNode"
6553
value: "Worker"
6654
nodeSelector: {}
6755

6856
training-operators:
6957
tolerations:
7058
- effect: NoSchedule
71-
operator: Exists
59+
operator: Equal
7260
key: "sagemaker.amazonaws.com/RestrictedNode"
7361
value: "Worker"
7462
nodeSelector:
@@ -142,7 +130,7 @@ aws-efa-k8s-device-plugin:
142130
value: "Unschedulable"
143131
effect: "NoSchedule"
144132
- effect: NoSchedule
145-
operator: Exists
133+
operator: Equal
146134
key: "sagemaker.amazonaws.com/RestrictedNode"
147135
value: "Worker"
148136
nodeSelector: {}
@@ -200,7 +188,7 @@ nvidia-device-plugin:
200188
value: Unschedulable
201189
effect: NoSchedule
202190
- effect: NoSchedule
203-
operator: Exists
191+
operator: Equal
204192
key: "sagemaker.amazonaws.com/RestrictedNode"
205193
value: "Worker"
206194
nodeSelector: {}

helm_chart/install_rig_dependencies.sh

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ add_ons=(
88
"eks,kube-system,coredns"
99
"hp,kube-system,mpi-operator"
1010
"hp,kube-system,neuron-device-plugin"
11-
"hp,kube-system,health-monitoring-agent"
1211
"hp,kube-system,training-operators"
1312
)
1413

@@ -30,6 +29,7 @@ fetch_yaml_and_enable_overrides() {
3029
if [ "$scope" = "eks" ]; then
3130
kubectl get deployment $name -n $namespace -o yaml | \
3231
yq 'select(.kind == "Deployment" or .kind == "DaemonSet")' - | yq e "
32+
.metadata.name = \"rig-\" + .metadata.name |
3333
.spec.template.spec.nodeSelector = \"NODESELECTORS\" |
3434
.spec.template.spec.tolerations = \"TOLERATIONS\"
3535
" - | \
@@ -48,8 +48,9 @@ EOF
4848

4949

5050
else
51-
helm template $name $SRC_DIR/charts/$name -f $SRC_DIR/values.yaml -f $SRC_DIR/charts/$name/values.yaml --debug | \
51+
helm template dependencies $SRC_DIR/charts/$name -f $SRC_DIR/values.yaml -f $SRC_DIR/charts/$name/values.yaml --debug | \
5252
yq 'select(.kind == "Deployment" or .kind == "DaemonSet")' - | yq e "
53+
.metadata.name = \"rig-\" + .metadata.name |
5354
.spec.template.spec.nodeSelector = \"NODESELECTORS\" |
5455
.spec.template.spec.tolerations = \"TOLERATIONS\"
5556
" - | \
@@ -66,7 +67,18 @@ if ! command -v yq &> /dev/null; then
6667
fi
6768
fetch_yaml_and_enable_overrides add_ons[@]
6869
helm dependencies update ./HyperPodHelmChartForRIG # This needs to be run after any dependency template change before "helm <template | install>"
69-
helm template rig-dependencies ./HyperPodHelmChartForRIG --namespace kube-system -f ./HyperPodHelmChartForRIG/values.yaml
70+
helm template rig-dependencies ./HyperPodHelmChartForRIG --namespace kube-system -f ./HyperPodHelmChartForRIG/values.yaml > rig-dependencies.yaml
71+
cat rig-dependencies.yaml
72+
echo
73+
read -p "🚀 Do you want to install this Helm chart? [y/N]: " confirm
74+
75+
if [[ "$confirm" =~ ^[Yy]$ ]]; then
76+
echo "🔧 Installing Helm chart..."
77+
helm install rig-dependencies ./HyperPodHelmChartForRIG --namespace kube-system -f ./HyperPodHelmChartForRIG/values.yaml
78+
else
79+
echo "❌ Installation cancelled."
80+
fi
81+
7082
echo "Templates generated in $OUTPUT_DIR"
7183
echo ""
7284
echo ""

0 commit comments

Comments
 (0)