Skip to content

Commit 1cac45e

Browse files
committed
Enable status-exporter in setup and values configuration
- Updated setup.sh to include status-exporter in the loading process for the kind cluster. - Added necessary labels for the status-exporter DaemonSet in the setup script. - Modified values.yaml to enable the status-exporter and set its image pull policy to Never.
1 parent 58c612b commit 1cac45e

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

test/integration/setup.sh

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ if [[ "${SKIP_SETUP}" != "true" ]]; then
6161

6262
echo "Loading images into kind cluster..."
6363
DOCKER_REPO_BASE="${DOCKER_REPO_BASE:-ghcr.io/run-ai/fake-gpu-operator}"
64-
for component in dra-plugin-gpu status-updater topology-server; do
64+
for component in dra-plugin-gpu status-updater status-exporter topology-server kwok-dra-plugin; do
6565
IMAGE="${DOCKER_REPO_BASE}/${component}:${DOCKER_TAG}"
6666
echo "Loading ${IMAGE}..."
6767
if [[ "${CONTAINER_TOOL}" == "podman" ]]; then
@@ -90,6 +90,8 @@ if [[ "${SKIP_SETUP}" != "true" ]]; then
9090
kubectl label node "${NODE}" nvidia.com/gpu.deploy.dra-plugin-gpu=true --overwrite
9191
# Label for status-updater topology (node pool name)
9292
kubectl label node "${NODE}" run.ai/simulated-gpu-node-pool=default --overwrite
93+
# Label for status-exporter DaemonSet
94+
kubectl label node "${NODE}" nvidia.com/gpu.deploy.dcgm-exporter=true --overwrite
9395
done
9496

9597
# Store worker node name for later reference
@@ -106,11 +108,16 @@ if [[ "${SKIP_SETUP}" != "true" ]]; then
106108
-f "${SCRIPTS_DIR}/values.yaml" \
107109
--set draPlugin.image.tag="${DOCKER_TAG}" \
108110
--set statusUpdater.image.tag="${DOCKER_TAG}" \
109-
--set topologyServer.image.tag="${DOCKER_TAG}"
111+
--set statusExporter.image.tag="${DOCKER_TAG}" \
112+
--set topologyServer.image.tag="${DOCKER_TAG}" \
113+
--set kwokDraPlugin.image.tag="${DOCKER_TAG}"
110114

111115
echo "Waiting for status-updater pod to be ready..."
112116
kubectl wait --for=condition=Ready pod -l app=status-updater -n gpu-operator --timeout=120s
113117

118+
echo "Waiting for status-exporter pod to be ready..."
119+
kubectl wait --for=condition=Ready pod -l app=nvidia-dcgm-exporter -n gpu-operator --timeout=120s
120+
114121
echo "Waiting for topology-server pod to be ready..."
115122
kubectl wait --for=condition=Ready pod -l app=topology-server -n gpu-operator --timeout=120s
116123

test/integration/values.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ devicePlugin:
3232
enabled: false
3333

3434
statusExporter:
35-
enabled: false
35+
enabled: true
36+
image:
37+
pullPolicy: Never
3638

3739
kwokGpuDevicePlugin:
3840
enabled: false

0 commit comments

Comments
 (0)