Skip to content

Commit c97824d

Browse files
[ci] Add RayImagePushContext for publishing wanda Ray images
Adds new script `push_ray_image.py` for publishing Wanda-cached Ray images to Docker Hub. Focus on this is replicating the tagging logic from ci/ray_ci/docker_container.py. Many test cases were added to try to replicate the existing publishing cases, but it'll be good to hear if any others would be helpful. Topic: push-script Signed-off-by: andrew <andrew@anyscale.com>
1 parent cb6188d commit c97824d

File tree

3 files changed

+665
-0
lines changed

3 files changed

+665
-0
lines changed

ci/ray_ci/automation/BUILD.bazel

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,3 +316,29 @@ py_binary(
316316
ci_require("click"),
317317
],
318318
)
319+
320+
py_binary(
321+
name = "push_ray_image",
322+
srcs = ["push_ray_image.py"],
323+
exec_compatible_with = ["//bazel:py3"],
324+
deps = [
325+
":crane_lib",
326+
"//ci/ray_ci:ray_ci_lib",
327+
ci_require("click"),
328+
],
329+
)
330+
331+
py_test(
332+
name = "test_push_ray_image",
333+
size = "small",
334+
srcs = ["test_push_ray_image.py"],
335+
exec_compatible_with = ["//bazel:py3"],
336+
tags = [
337+
"ci_unit",
338+
"team:ci",
339+
],
340+
deps = [
341+
":push_ray_image",
342+
ci_require("pytest"),
343+
],
344+
)
Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
import logging
2+
import sys
3+
from datetime import datetime
4+
from typing import List
5+
6+
import click
7+
8+
from ci.ray_ci.automation.crane_lib import (
9+
call_crane_copy,
10+
call_crane_manifest,
11+
)
12+
from ci.ray_ci.configs import (
13+
ARCHITECTURE,
14+
DEFAULT_ARCHITECTURE,
15+
DEFAULT_PYTHON_TAG_VERSION,
16+
PYTHON_VERSIONS,
17+
)
18+
from ci.ray_ci.docker_container import (
19+
ARCHITECTURES_RAY,
20+
ARCHITECTURES_RAY_LLM,
21+
ARCHITECTURES_RAY_ML,
22+
GPU_PLATFORM,
23+
PLATFORMS_RAY,
24+
PLATFORMS_RAY_LLM,
25+
PLATFORMS_RAY_ML,
26+
PYTHON_VERSIONS_RAY,
27+
PYTHON_VERSIONS_RAY_LLM,
28+
PYTHON_VERSIONS_RAY_ML,
29+
RAY_REPO_MAP,
30+
RayType,
31+
)
32+
from ci.ray_ci.utils import ecr_docker_login
33+
34+
VALID_IMAGE_TYPES = [rt.value for rt in RayType]
35+
36+
logging.basicConfig(
37+
level=logging.INFO,
38+
format="%(message)s",
39+
stream=sys.stdout,
40+
)
41+
logger = logging.getLogger(__name__)
42+
43+
44+
class PushRayImageError(Exception):
45+
"""Error raised when pushing ray images fails."""
46+
47+
48+
def compact_cuda_suffix(platform: str) -> str:
49+
"""Convert a CUDA platform string to compact suffix (e.g. cu12.1.1-cudnn8 -> -cu121)."""
50+
platform_base = platform.split("-", 1)[0]
51+
parts = platform_base.split(".")
52+
if len(parts) < 2:
53+
raise PushRayImageError(f"Unrecognized GPU platform format: {platform}")
54+
55+
return f"-{parts[0]}{parts[1]}"
56+
57+
58+
class RayImagePushContext:
59+
"""Context for publishing a ray image from Wanda cache to Docker Hub."""
60+
61+
ray_type: RayType
62+
python_version: str
63+
platform: str
64+
architecture: str
65+
branch: str
66+
commit: str
67+
rayci_schedule: str
68+
rayci_build_id: str
69+
pull_request: str # buildkite uses "false" or number string
70+
# Computed fields (set in __init__)
71+
arch_suffix: str
72+
wanda_tag: str
73+
docker_hub_repo: str
74+
75+
def __init__(
76+
self,
77+
ray_type: RayType,
78+
python_version: str,
79+
platform: str,
80+
architecture: str,
81+
branch: str,
82+
commit: str,
83+
rayci_schedule: str,
84+
rayci_build_id: str,
85+
pull_request: str,
86+
) -> None:
87+
self.ray_type = ray_type
88+
self.python_version = python_version
89+
self.platform = platform
90+
self.architecture = architecture
91+
self.branch = branch
92+
self.commit = commit
93+
self.rayci_schedule = rayci_schedule
94+
self.rayci_build_id = rayci_build_id
95+
self.pull_request = pull_request
96+
97+
arch_suffix = "" if architecture == DEFAULT_ARCHITECTURE else f"-{architecture}"
98+
self.arch_suffix = arch_suffix
99+
self.wanda_tag = f"{rayci_build_id}-{self.wanda_image_name()}"
100+
self.docker_hub_repo = f"rayproject/{RAY_REPO_MAP[self.ray_type.value]}"
101+
102+
def assert_published_image_type(self) -> None:
103+
invalid_python_version = (
104+
f"Invalid python version {self.python_version} for {self.ray_type}"
105+
)
106+
invalid_platform = f"Invalid platform {self.platform} for {self.ray_type}"
107+
invalid_architecture = (
108+
f"Invalid architecture {self.architecture} for {self.ray_type}"
109+
)
110+
111+
if self.ray_type in [RayType.RAY_ML, RayType.RAY_ML_EXTRA]:
112+
if self.python_version not in PYTHON_VERSIONS_RAY_ML:
113+
raise PushRayImageError(invalid_python_version)
114+
if self.platform not in PLATFORMS_RAY_ML:
115+
raise PushRayImageError(invalid_platform)
116+
if self.architecture not in ARCHITECTURES_RAY_ML:
117+
raise PushRayImageError(invalid_architecture)
118+
elif self.ray_type in [RayType.RAY_LLM, RayType.RAY_LLM_EXTRA]:
119+
if self.python_version not in PYTHON_VERSIONS_RAY_LLM:
120+
raise PushRayImageError(invalid_python_version)
121+
if self.platform not in PLATFORMS_RAY_LLM:
122+
raise PushRayImageError(invalid_platform)
123+
if self.architecture not in ARCHITECTURES_RAY_LLM:
124+
raise PushRayImageError(invalid_architecture)
125+
else:
126+
# ray or ray-extra
127+
if self.python_version not in PYTHON_VERSIONS_RAY:
128+
raise PushRayImageError(invalid_python_version)
129+
if self.platform not in PLATFORMS_RAY:
130+
raise PushRayImageError(invalid_platform)
131+
if self.architecture not in ARCHITECTURES_RAY:
132+
raise PushRayImageError(invalid_architecture)
133+
134+
def destination_tags(self) -> List[str]:
135+
"""
136+
Compute the destination tags for this context.
137+
138+
Tags are formed as:
139+
{version}{variation}{python_suffix}{platform}{architecture_suffix}
140+
141+
For example:
142+
- nightly.260107.abc123-py310-cpu
143+
- nightly-extra-py310-cu121
144+
- nightly.260107.abc123-extra-py310-gpu
145+
- 2.53.0.abc123-py310-cu121
146+
- 2.53.0.abc123-extra-py310-cu121
147+
"""
148+
tags = []
149+
for version in self._versions():
150+
for plat in self._platform_suffixes():
151+
for py in self._python_suffixes():
152+
tags.append(
153+
f"{version}{self._variation_suffix()}{py}{plat}{self.arch_suffix}"
154+
)
155+
return tags
156+
157+
def wanda_image_name(self) -> str:
158+
"""Get the wanda source image name for this context."""
159+
if self.platform == "cpu":
160+
return (
161+
f"{self.ray_type.value}-py{self.python_version}-cpu{self.arch_suffix}"
162+
)
163+
return f"{self.ray_type.value}-py{self.python_version}-{self.platform}{self.arch_suffix}"
164+
165+
def _versions(self) -> List[str]:
166+
"""Compute version tags based on branch/schedule/PR status."""
167+
is_master = self.branch == "master"
168+
is_nightly = self.rayci_schedule == "nightly"
169+
is_pull_request = self.pull_request != "false"
170+
is_release = self.branch and self.branch.startswith("releases/")
171+
sha_tag = self.commit[:6]
172+
formatted_date = datetime.now().strftime("%y%m%d")
173+
174+
if is_master:
175+
if is_nightly:
176+
return [f"nightly.{formatted_date}.{sha_tag}", "nightly"]
177+
return [sha_tag, self.rayci_build_id]
178+
elif is_release:
179+
release_name = self.branch[len("releases/") :]
180+
return [f"{release_name}.{sha_tag}"]
181+
elif is_pull_request:
182+
return [f"pr-{self.pull_request}.{sha_tag}", self.rayci_build_id]
183+
else:
184+
return [sha_tag, self.rayci_build_id]
185+
186+
def _variation_suffix(self) -> str:
187+
"""Get -extra suffix for extra image types."""
188+
if self.ray_type in {
189+
RayType.RAY_EXTRA,
190+
RayType.RAY_ML_EXTRA,
191+
RayType.RAY_LLM_EXTRA,
192+
}:
193+
return "-extra"
194+
return ""
195+
196+
def _python_suffixes(self) -> List[str]:
197+
"""Get python version suffixes (includes empty for default version)."""
198+
suffixes = [f"-py{self.python_version.replace('.', '')}"]
199+
if self.python_version == DEFAULT_PYTHON_TAG_VERSION:
200+
suffixes.append("")
201+
return suffixes
202+
203+
def _platform_suffixes(self) -> List[str]:
204+
"""Get platform suffixes (includes aliases like -gpu for GPU_PLATFORM)."""
205+
if self.platform == "cpu":
206+
suffixes = ["-cpu"]
207+
# no tag is alias to cpu for ray image
208+
if self.ray_type in {RayType.RAY, RayType.RAY_EXTRA}:
209+
suffixes.append("")
210+
return suffixes
211+
212+
suffixes = [compact_cuda_suffix(self.platform)]
213+
if self.platform == GPU_PLATFORM:
214+
# gpu is alias to GPU_PLATFORM value for ray image
215+
suffixes.append("-gpu")
216+
# no tag is alias to gpu for ray-ml image
217+
if self.ray_type in {RayType.RAY_ML, RayType.RAY_ML_EXTRA}:
218+
suffixes.append("")
219+
220+
return suffixes
221+
222+
223+
def _image_exists(tag: str) -> bool:
224+
"""Check if a container image manifest exists using crane."""
225+
return_code, _ = call_crane_manifest(tag)
226+
return return_code == 0
227+
228+
229+
def _copy_image(reference: str, destination: str, dry_run: bool = False) -> None:
230+
"""Copy a container image from source to destination using crane."""
231+
if dry_run:
232+
logger.info(f"DRY RUN: Would copy {reference} -> {destination}")
233+
return
234+
235+
logger.info(f"Copying {reference} -> {destination}")
236+
return_code, output = call_crane_copy(reference, destination)
237+
if return_code != 0:
238+
raise PushRayImageError(f"Crane copy failed: {output}")
239+
logger.info(f"Successfully copied to {destination}")
240+
241+
242+
@click.command()
243+
@click.option(
244+
"--python-version", type=click.Choice(list(PYTHON_VERSIONS.keys())), required=True
245+
)
246+
@click.option("--platform", type=click.Choice(list(PLATFORMS_RAY)), required=True)
247+
@click.option(
248+
"--image-type",
249+
type=click.Choice(VALID_IMAGE_TYPES),
250+
required=True,
251+
)
252+
@click.option("--architecture", type=click.Choice(ARCHITECTURE), required=True)
253+
@click.option("--rayci-work-repo", type=str, required=True, envvar="RAYCI_WORK_REPO")
254+
@click.option("--rayci-build-id", type=str, required=True, envvar="RAYCI_BUILD_ID")
255+
@click.option("--branch", type=str, required=True, envvar="BUILDKITE_BRANCH")
256+
@click.option("--commit", type=str, required=True, envvar="BUILDKITE_COMMIT")
257+
@click.option("--rayci-schedule", type=str, default="", envvar="RAYCI_SCHEDULE")
258+
@click.option(
259+
"--pull-request", type=str, default="false", envvar="BUILDKITE_PULL_REQUEST"
260+
)
261+
@click.option("--upload", is_flag=True, default=False)
262+
def main(
263+
python_version: str,
264+
platform: str,
265+
image_type: str,
266+
architecture: str,
267+
rayci_work_repo: str,
268+
rayci_build_id: str,
269+
branch: str,
270+
commit: str,
271+
rayci_schedule: str,
272+
pull_request: str,
273+
upload: bool,
274+
) -> None:
275+
"""
276+
Publish a Wanda-cached ray image to Docker Hub.
277+
278+
Tags are generated matching the original RayDockerContainer format:
279+
{version}{variation}{python_suffix}{platform}{architecture_suffix}
280+
"""
281+
dry_run = not upload
282+
if dry_run:
283+
logger.info("DRY RUN MODE - no images will be pushed")
284+
285+
ctx = RayImagePushContext(
286+
ray_type=RayType(image_type),
287+
python_version=python_version,
288+
platform=platform,
289+
architecture=architecture,
290+
branch=branch,
291+
commit=commit,
292+
rayci_schedule=rayci_schedule,
293+
rayci_build_id=rayci_build_id,
294+
pull_request=pull_request,
295+
)
296+
297+
ctx.assert_published_image_type()
298+
299+
ecr_registry = rayci_work_repo.split("/")[0]
300+
ecr_docker_login(ecr_registry)
301+
302+
src_ref = f"{rayci_work_repo}:{ctx.wanda_tag}"
303+
logger.info(f"Verifying source image in Wanda cache: {src_ref}")
304+
if not _image_exists(src_ref):
305+
raise PushRayImageError(f"Source image not found in Wanda cache: {src_ref}")
306+
307+
for tag in ctx.destination_tags():
308+
dest_ref = f"{ctx.docker_hub_repo}:{tag}"
309+
_copy_image(src_ref, dest_ref, dry_run=dry_run)
310+
311+
logger.info(
312+
f"Successfully pushed {ctx.ray_type.value} image with tags: {ctx.destination_tags()}"
313+
)
314+
315+
316+
if __name__ == "__main__":
317+
main()

0 commit comments

Comments
 (0)