|
1 | 1 | #!/usr/bin/env python3 |
| 2 | +# /* |
| 3 | +# Copyright 2026 The Grove Authors. |
| 4 | +# |
| 5 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | +# you may not use this file except in compliance with the License. |
| 7 | +# You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, software |
| 12 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | +# See the License for the specific language governing permissions and |
| 15 | +# limitations under the License. |
| 16 | +# */ |
| 17 | + |
2 | 18 | """setup_autoMNNVL_cluster.py - Set up a k3d cluster for autoMNNVL e2e testing. |
3 | 19 |
|
4 | 20 | This script handles cluster creation, fake GPU operator installation, |
|
13 | 29 | --without-fake-gpu Skip fake GPU operator installation |
14 | 30 | --mnnvl-enabled Enable MNNVL feature in Grove (default) |
15 | 31 | --mnnvl-disabled Disable MNNVL feature in Grove |
16 | | - --build Build images with docker (default) |
17 | | - --skip-build Skip image build, use existing local images |
| 32 | + --build Build images with skaffold/ko (default) |
| 33 | + --skip-build Skip image build, reuse images already in registry |
18 | 34 | --skip-operator-wait Don't wait for operator pod readiness |
19 | 35 | --image <tag> Use existing image with specified tag |
20 | 36 | --help Show this help message |
|
27 | 43 | import subprocess |
28 | 44 | import sys |
29 | 45 | import time |
| 46 | +from datetime import datetime, timezone |
30 | 47 | from pathlib import Path |
31 | 48 |
|
32 | 49 | # --------------------------------------------------------------------------- |
@@ -188,33 +205,44 @@ def uninstall_fake_gpu_operator() -> None: |
188 | 205 | # --------------------------------------------------------------------------- |
189 | 206 | # Build / push images |
190 | 207 | # --------------------------------------------------------------------------- |
191 | | -def build_images() -> None: |
192 | | - log_info("Building Grove operator images...") |
| 208 | +def build_and_push_images() -> None: |
| 209 | + """Build Grove images with skaffold/ko and push to the local k3d registry. |
| 210 | +
|
| 211 | + Uses the same approach as the main e2e cluster setup (create-e2e-cluster.py): |
| 212 | + skaffold build with the ko builder compiles Go binaries into OCI images and |
| 213 | + pushes them directly to the registry -- no Docker build required. |
| 214 | + """ |
| 215 | + log_info("Building and pushing Grove operator images with skaffold...") |
193 | 216 | run("./hack/prepare-charts.sh", cwd=OPERATOR_DIR) |
| 217 | + |
| 218 | + push_repo = f"localhost:{REGISTRY_PORT}" |
| 219 | + build_date = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") |
| 220 | + env = { |
| 221 | + "VERSION": IMAGE_TAG, |
| 222 | + "LD_FLAGS": ( |
| 223 | + "-X github.com/ai-dynamo/grove/operator/internal/version.gitCommit=e2e-test-commit " |
| 224 | + "-X github.com/ai-dynamo/grove/operator/internal/version.gitTreeState=clean " |
| 225 | + f"-X github.com/ai-dynamo/grove/operator/internal/version.buildDate={build_date} " |
| 226 | + "-X github.com/ai-dynamo/grove/operator/internal/version.gitVersion=mnnvl-e2e" |
| 227 | + ), |
| 228 | + } |
| 229 | + |
| 230 | + log_info(f"Running skaffold build (push to {push_repo})...") |
194 | 231 | run( |
195 | | - "./hack/docker-build.sh", |
| 232 | + f"skaffold build --default-repo {push_repo}", |
196 | 233 | cwd=OPERATOR_DIR, |
197 | | - env={ |
198 | | - "GOARCH": "amd64", |
199 | | - "PLATFORM": "linux/amd64", |
200 | | - "DOCKER_BUILD_ADDITIONAL_ARGS": "--load", |
201 | | - }, |
| 234 | + env=env, |
202 | 235 | ) |
203 | 236 |
|
| 237 | + log_success("Grove images built and pushed to registry") |
204 | 238 |
|
205 | | -def push_images_to_registry() -> None: |
206 | | - log_info("Pushing images to local registry...") |
207 | | - |
208 | | - run(f"docker tag grove-operator:latest localhost:{REGISTRY_PORT}/grove-operator:{IMAGE_TAG}") |
209 | | - run(f"docker tag grove-initc:latest localhost:{REGISTRY_PORT}/grove-initc:{IMAGE_TAG}") |
210 | | - run(f"docker push localhost:{REGISTRY_PORT}/grove-operator:{IMAGE_TAG}") |
211 | | - run(f"docker push localhost:{REGISTRY_PORT}/grove-initc:{IMAGE_TAG}") |
212 | | - |
213 | | - # Also push alpine for test workloads |
| 239 | + # Push alpine for test workloads (simple pull/tag/push, not a build) |
| 240 | + log_info("Pushing alpine image for test workloads...") |
| 241 | + run("docker pull alpine:latest", check=False) |
214 | 242 | run(f"docker tag alpine:latest localhost:{REGISTRY_PORT}/alpine:latest") |
215 | 243 | run(f"docker push localhost:{REGISTRY_PORT}/alpine:latest") |
216 | 244 |
|
217 | | - log_success("Images pushed to registry") |
| 245 | + log_success("All images pushed to registry") |
218 | 246 |
|
219 | 247 |
|
220 | 248 | # --------------------------------------------------------------------------- |
@@ -320,9 +348,9 @@ def parse_args() -> argparse.Namespace: |
320 | 348 |
|
321 | 349 | build_group = parser.add_mutually_exclusive_group() |
322 | 350 | build_group.add_argument("--build", action="store_true", default=None, |
323 | | - help="Build images with docker (default)") |
| 351 | + help="Build images with skaffold/ko (default)") |
324 | 352 | build_group.add_argument("--skip-build", action="store_true", default=None, |
325 | | - help="Skip image build, use existing local images") |
| 353 | + help="Skip image build, reuse images already in registry") |
326 | 354 | build_group.add_argument("--image", metavar="TAG", default=None, |
327 | 355 | help="Use existing image with specified tag") |
328 | 356 |
|
@@ -424,19 +452,12 @@ def main() -> None: |
424 | 452 | else: |
425 | 453 | uninstall_fake_gpu_operator() |
426 | 454 |
|
427 | | - # Step 3: Build images (if requested) |
| 455 | + # Step 3: Build and push images with skaffold/ko |
| 456 | + # Skaffold builds Go binaries with ko and pushes directly to the k3d registry. |
428 | 457 | if cfg["build_images"]: |
429 | | - build_images() |
430 | | - else: |
431 | | - log_warning("Skipping image build (using existing images)") |
432 | | - |
433 | | - # Step 3b: Push images to registry |
434 | | - # Always push when a new cluster was created (fresh registry has no images). |
435 | | - # When reusing an existing cluster, push only if we just built new images. |
436 | | - if not cfg["skip_cluster_create"] or cfg["build_images"]: |
437 | | - push_images_to_registry() |
| 458 | + build_and_push_images() |
438 | 459 | else: |
439 | | - log_info("Skipping image push (reusing existing cluster with existing images)") |
| 460 | + log_warning("Skipping image build (images already in registry)") |
440 | 461 |
|
441 | 462 | # Step 4: Install Grove operator |
442 | 463 | install_grove_operator( |
|
0 commit comments