Skip to content

Commit 203941a

Browse files
replace component image vars with Helm values file
Replace individual component image environment variables with a values override file approach. Add helper scripts for generating values files from env vars and component images. Ensure clean separation between Helm values file (-f) and --set flags to avoid conflicts. This reduces workflow boilerplate by ~58% and makes adding new operands trivial (no workflow changes needed). Signed-off-by: Carlos Eduardo Arango Gutierrez <[email protected]>
1 parent 31be0c7 commit 203941a

File tree

6 files changed

+332
-122
lines changed

6 files changed

+332
-122
lines changed
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright 2024 NVIDIA CORPORATION
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
set -euo pipefail
18+
19+
# Usage: generate-values-overrides.sh OUTPUT_FILE TOOLKIT_IMAGE DEVICE_PLUGIN_IMAGE MIG_MANAGER_IMAGE
20+
#
21+
# Generates a Helm values override file for GPU Operator component images.
22+
# This file can be used with `helm install -f values-overrides.yaml` to
23+
# override default component image versions.
24+
25+
if [[ $# -ne 4 ]]; then
26+
echo "Usage: $0 OUTPUT_FILE TOOLKIT_IMAGE DEVICE_PLUGIN_IMAGE MIG_MANAGER_IMAGE" >&2
27+
echo "" >&2
28+
echo "Example:" >&2
29+
echo " $0 values.yaml \\" >&2
30+
echo " ghcr.io/nvidia/container-toolkit:v1.18.0-ubuntu20.04 \\" >&2
31+
echo " ghcr.io/nvidia/k8s-device-plugin:v0.17.0-ubi8 \\" >&2
32+
echo " ghcr.io/nvidia/k8s-mig-manager:v0.10.0-ubuntu20.04" >&2
33+
exit 1
34+
fi
35+
36+
OUTPUT_FILE="$1"
37+
TOOLKIT_IMAGE="$2"
38+
DEVICE_PLUGIN_IMAGE="$3"
39+
MIG_MANAGER_IMAGE="$4"
40+
41+
# Generate values override file
42+
cat > "${OUTPUT_FILE}" <<EOF
43+
# Generated by generate-values-overrides.sh
44+
# Date: $(date -u +"%Y-%m-%d %H:%M:%S UTC")
45+
#
46+
# This file overrides default GPU Operator component images with
47+
# specific versions for forward compatibility testing.
48+
49+
toolkit:
50+
repository: ""
51+
version: ""
52+
image: "${TOOLKIT_IMAGE}"
53+
54+
devicePlugin:
55+
repository: ""
56+
version: ""
57+
image: "${DEVICE_PLUGIN_IMAGE}"
58+
59+
migManager:
60+
repository: ""
61+
version: ""
62+
image: "${MIG_MANAGER_IMAGE}"
63+
EOF
64+
65+
echo "Generated values override file: ${OUTPUT_FILE}"
66+
echo ""
67+
echo "=== Component Images ==="
68+
echo "Container Toolkit: ${TOOLKIT_IMAGE}"
69+
echo "Device Plugin: ${DEVICE_PLUGIN_IMAGE}"
70+
echo "MIG Manager: ${MIG_MANAGER_IMAGE}"
71+
echo ""
72+
echo "=== File Contents ==="
73+
cat "${OUTPUT_FILE}"
74+

.github/workflows/e2e-tests.yaml

Lines changed: 22 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -28,18 +28,11 @@ on:
2828
operator_version:
2929
required: true
3030
type: string
31-
toolkit_image:
31+
use_values_override:
3232
required: false
33-
type: string
34-
description: 'Full container-toolkit image path (e.g., ghcr.io/nvidia/container-toolkit:v1.18.0)'
35-
device_plugin_image:
36-
required: false
37-
type: string
38-
description: 'Full device-plugin image path'
39-
mig_manager_image:
40-
required: false
41-
type: string
42-
description: 'Full mig-manager image path'
33+
type: boolean
34+
default: false
35+
description: 'Use values-overrides artifact for component image configuration'
4336
secrets:
4437
AWS_ACCESS_KEY_ID:
4538
required: true
@@ -61,28 +54,13 @@ on:
6154
description: 'Operator version to test (override)'
6255
required: false
6356
type: string
64-
toolkit_image:
65-
description: 'Override container-toolkit image'
66-
required: false
67-
type: string
68-
device_plugin_image:
69-
description: 'Override device-plugin image'
70-
required: false
71-
type: string
72-
mig_manager_image:
73-
description: 'Override mig-manager image'
74-
required: false
75-
type: string
7657

7758
jobs:
7859
variables:
7960
uses: ./.github/workflows/variables.yaml
8061
with:
8162
operator_image: ${{ inputs.operator_image }}
8263
operator_version: ${{ inputs.operator_version }}
83-
toolkit_image: ${{ inputs.toolkit_image }}
84-
device_plugin_image: ${{ inputs.device_plugin_image }}
85-
mig_manager_image: ${{ inputs.mig_manager_image }}
8664

8765
e2e-tests-containerd:
8866
needs: [variables]
@@ -93,6 +71,12 @@ jobs:
9371
steps:
9472
- uses: actions/checkout@v5
9573
name: Check out code
74+
- name: Download values override file
75+
if: ${{ inputs.use_values_override }}
76+
uses: actions/download-artifact@v5
77+
with:
78+
name: values-overrides
79+
path: ${{ github.workspace }}
9680
- name: Set up Holodeck
9781
uses: NVIDIA/[email protected]
9882
with:
@@ -109,13 +93,13 @@ jobs:
10993
run: |
11094
echo "instance_hostname=ubuntu@${{ steps.get_public_dns_name.outputs.result }}" >> $GITHUB_ENV
11195
echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV
96+
if [[ "${{ inputs.use_values_override }}" == "true" ]]; then
97+
echo "VALUES_FILE=${{ github.workspace }}/values-overrides.yaml" >> $GITHUB_ENV
98+
fi
11299
- name: Run e2e tests
113100
env:
114101
OPERATOR_VERSION: ${{ needs.variables.outputs.operator_version }}
115102
OPERATOR_IMAGE: ${{ needs.variables.outputs.operator_image }}
116-
TOOLKIT_CONTAINER_IMAGE: ${{ needs.variables.outputs.toolkit_image }}
117-
DEVICE_PLUGIN_IMAGE: ${{ needs.variables.outputs.device_plugin_image }}
118-
MIG_MANAGER_IMAGE: ${{ needs.variables.outputs.mig_manager_image }}
119103
GPU_PRODUCT_NAME: "Tesla-T4"
120104
SKIP_LAUNCH: "true"
121105
CONTAINER_RUNTIME: "containerd"
@@ -142,6 +126,12 @@ jobs:
142126
steps:
143127
- uses: actions/checkout@v5
144128
name: Check out code
129+
- name: Download values override file
130+
if: ${{ inputs.use_values_override }}
131+
uses: actions/download-artifact@v5
132+
with:
133+
name: values-overrides
134+
path: ${{ github.workspace }}
145135
- name: Set up Holodeck
146136
uses: NVIDIA/[email protected]
147137
with:
@@ -158,13 +148,13 @@ jobs:
158148
run: |
159149
echo "instance_hostname=ubuntu@${{ steps.get_public_dns_name.outputs.result }}" >> $GITHUB_ENV
160150
echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV
151+
if [[ "${{ inputs.use_values_override }}" == "true" ]]; then
152+
echo "VALUES_FILE=${{ github.workspace }}/values-overrides.yaml" >> $GITHUB_ENV
153+
fi
161154
- name: Run e2e tests
162155
env:
163156
OPERATOR_VERSION: ${{ needs.variables.outputs.operator_version }}
164157
OPERATOR_IMAGE: ${{ needs.variables.outputs.operator_image }}
165-
TOOLKIT_CONTAINER_IMAGE: ${{ needs.variables.outputs.toolkit_image }}
166-
DEVICE_PLUGIN_IMAGE: ${{ needs.variables.outputs.device_plugin_image }}
167-
MIG_MANAGER_IMAGE: ${{ needs.variables.outputs.mig_manager_image }}
168158
GPU_PRODUCT_NAME: "Tesla-T4"
169159
SKIP_LAUNCH: "true"
170160
CONTAINER_RUNTIME: "containerd"

.github/workflows/forward-compatibility.yaml

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,6 @@ concurrency:
2626
jobs:
2727
fetch-latest-images:
2828
runs-on: ubuntu-latest
29-
outputs:
30-
toolkit_image: ${{ steps.images.outputs.toolkit_image }}
31-
device_plugin_image: ${{ steps.images.outputs.device_plugin_image }}
32-
mig_manager_image: ${{ steps.images.outputs.mig_manager_image }}
3329
steps:
3430
- uses: actions/checkout@v5
3531

@@ -41,38 +37,41 @@ jobs:
4137
chmod +x bin/regctl
4238
echo "$(pwd)/bin" >> $GITHUB_PATH
4339
44-
- name: Get latest component images
45-
id: images
40+
- name: Get latest component images and generate values override file
4641
env:
4742
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
4843
run: |
4944
# Fetch latest images from component repositories
5045
echo "::notice::Fetching latest container-toolkit image..."
5146
TOOLKIT=$(.github/scripts/get-latest-images.sh toolkit)
52-
echo "toolkit_image=${TOOLKIT}" >> $GITHUB_OUTPUT
5347
5448
echo "::notice::Fetching latest device-plugin image..."
5549
DEVICE_PLUGIN=$(.github/scripts/get-latest-images.sh device-plugin)
56-
echo "device_plugin_image=${DEVICE_PLUGIN}" >> $GITHUB_OUTPUT
5750
5851
echo "::notice::Fetching latest mig-manager image..."
5952
MIG_MANAGER=$(.github/scripts/get-latest-images.sh mig-manager)
60-
echo "mig_manager_image=${MIG_MANAGER}" >> $GITHUB_OUTPUT
6153
62-
echo "::notice::=== Forward Compatibility Test Configuration ==="
63-
echo "::notice::Container Toolkit: ${TOOLKIT}"
64-
echo "::notice::Device Plugin: ${DEVICE_PLUGIN}"
65-
echo "::notice::MIG Manager: ${MIG_MANAGER}"
54+
# Generate values override file
55+
.github/scripts/generate-values-overrides.sh \
56+
values-overrides.yaml \
57+
"${TOOLKIT}" \
58+
"${DEVICE_PLUGIN}" \
59+
"${MIG_MANAGER}"
60+
61+
- name: Upload values override file
62+
uses: actions/upload-artifact@v5
63+
with:
64+
name: values-overrides
65+
path: values-overrides.yaml
66+
retention-days: 30
6667

6768
run-e2e-tests:
6869
needs: [fetch-latest-images]
6970
uses: ./.github/workflows/e2e-tests.yaml
7071
with:
7172
operator_image: ghcr.io/nvidia/gpu-operator
7273
operator_version: main-latest
73-
toolkit_image: ${{ needs.fetch-latest-images.outputs.toolkit_image }}
74-
device_plugin_image: ${{ needs.fetch-latest-images.outputs.device_plugin_image }}
75-
mig_manager_image: ${{ needs.fetch-latest-images.outputs.mig_manager_image }}
74+
use_values_override: true
7675
secrets: inherit
7776

7877
notify-failure:
@@ -95,9 +94,7 @@ jobs:
9594
*Trigger:* ${{ github.event_name }}
9695
9796
*Tested Components:*
98-
• Container Toolkit: `${{ needs.fetch-latest-images.outputs.toolkit_image }}`
99-
• Device Plugin: `${{ needs.fetch-latest-images.outputs.device_plugin_image }}`
100-
• MIG Manager: `${{ needs.fetch-latest-images.outputs.mig_manager_image }}`
97+
Download `values-overrides` artifact to see tested component versions
10198
10299
*Details:* <https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Failed Run>
103100
<@S095E7BNGJU>

.github/workflows/variables.yaml

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,6 @@ on:
2323
description: 'Operator version to use (optional override)'
2424
required: false
2525
type: string
26-
toolkit_image:
27-
description: 'Full container-toolkit image path (optional)'
28-
required: false
29-
type: string
30-
device_plugin_image:
31-
description: 'Full device-plugin image path (optional)'
32-
required: false
33-
type: string
34-
mig_manager_image:
35-
description: 'Full mig-manager image path (optional)'
36-
required: false
37-
type: string
3826
outputs:
3927
commit_short_sha:
4028
description: "The short SHA to use as a version string"
@@ -57,15 +45,6 @@ on:
5745
operator_image:
5846
description: "The operator image (with override support)"
5947
value: ${{ jobs.variables.outputs.operator_image }}
60-
toolkit_image:
61-
description: "The container-toolkit image override"
62-
value: ${{ jobs.variables.outputs.toolkit_image }}
63-
device_plugin_image:
64-
description: "The device-plugin image override"
65-
value: ${{ jobs.variables.outputs.device_plugin_image }}
66-
mig_manager_image:
67-
description: "The mig-manager image override"
68-
value: ${{ jobs.variables.outputs.mig_manager_image }}
6948

7049
jobs:
7150
variables:
@@ -78,9 +57,6 @@ jobs:
7857
operator_image_base: ${{ steps.vars.outputs.operator_image_base }}
7958
operator_version: ${{ steps.vars.outputs.operator_version }}
8059
operator_image: ${{ steps.vars.outputs.operator_image }}
81-
toolkit_image: ${{ steps.vars.outputs.toolkit_image }}
82-
device_plugin_image: ${{ steps.vars.outputs.device_plugin_image }}
83-
mig_manager_image: ${{ steps.vars.outputs.mig_manager_image }}
8460
steps:
8561
- name: Checkout code
8662
uses: actions/checkout@v5
@@ -124,11 +100,6 @@ jobs:
124100
OPERATOR_IMAGE="${OPERATOR_IMAGE_BASE}"
125101
fi
126102
127-
# Component images (optional overrides)
128-
TOOLKIT_IMAGE="${{ inputs.toolkit_image }}"
129-
DEVICE_PLUGIN_IMAGE="${{ inputs.device_plugin_image }}"
130-
MIG_MANAGER_IMAGE="${{ inputs.mig_manager_image }}"
131-
132103
# Output all variables
133104
echo "commit_short_sha=${COMMIT_SHORT_SHA}" >> $GITHUB_OUTPUT
134105
echo "repo_full_name=${REPO_FULL_NAME}" >> $GITHUB_OUTPUT
@@ -137,21 +108,9 @@ jobs:
137108
echo "operator_image_base=${OPERATOR_IMAGE_BASE}" >> $GITHUB_OUTPUT
138109
echo "operator_version=${OPERATOR_VERSION}" >> $GITHUB_OUTPUT
139110
echo "operator_image=${OPERATOR_IMAGE}" >> $GITHUB_OUTPUT
140-
echo "toolkit_image=${TOOLKIT_IMAGE}" >> $GITHUB_OUTPUT
141-
echo "device_plugin_image=${DEVICE_PLUGIN_IMAGE}" >> $GITHUB_OUTPUT
142-
echo "mig_manager_image=${MIG_MANAGER_IMAGE}" >> $GITHUB_OUTPUT
143111
144112
# Display for debugging
145113
echo "::notice::Commit SHA: ${COMMIT_SHORT_SHA}"
146114
echo "::notice::Push on build: ${PUSH_ON_BUILD}"
147115
echo "::notice::Operator image: ${OPERATOR_IMAGE}:${OPERATOR_VERSION}"
148-
if [[ -n "${TOOLKIT_IMAGE}" ]]; then
149-
echo "::notice::Using custom toolkit: ${TOOLKIT_IMAGE}"
150-
fi
151-
if [[ -n "${DEVICE_PLUGIN_IMAGE}" ]]; then
152-
echo "::notice::Using custom device-plugin: ${DEVICE_PLUGIN_IMAGE}"
153-
fi
154-
if [[ -n "${MIG_MANAGER_IMAGE}" ]]; then
155-
echo "::notice::Using custom mig-manager: ${MIG_MANAGER_IMAGE}"
156-
fi
157116

0 commit comments

Comments
 (0)