Skip to content

Commit 39320fc

Browse files
chipspeakkryanbeane
authored andcommitted
task(RHOAIENG-33283): Elegantly handle runtime_env
Signed-off-by: Pat O'Connor <[email protected]>
1 parent 05162fb commit 39320fc

File tree

10 files changed

+1437
-769
lines changed

10 files changed

+1437
-769
lines changed

.github/workflows/rayjob_e2e_tests.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,10 @@ jobs:
115115
kubectl create clusterrolebinding sdk-user-service-reader --clusterrole=service-reader --user=sdk-user
116116
kubectl create clusterrole port-forward-pods --verb=create --resource=pods/portforward
117117
kubectl create clusterrolebinding sdk-user-port-forward-pods-binding --clusterrole=port-forward-pods --user=sdk-user
118+
kubectl create clusterrole configmap-manager --verb=get,list,create,delete,update,patch --resource=configmaps
119+
kubectl create clusterrolebinding sdk-user-configmap-manager --clusterrole=configmap-manager --user=sdk-user
120+
kubectl create clusterrole workload-reader --verb=get,list,watch --resource=workloads
121+
kubectl create clusterrolebinding sdk-user-workload-reader --clusterrole=workload-reader --user=sdk-user
118122
kubectl config use-context sdk-user
119123
120124
- name: Run RayJob E2E tests
@@ -126,7 +130,7 @@ jobs:
126130
pip install poetry
127131
poetry install --with test,docs
128132
echo "Running RayJob e2e tests..."
129-
poetry run pytest -v -s ./tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output_rayjob.log 2>&1
133+
poetry run pytest -v -s ./tests/e2e/rayjob/ > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output_rayjob.log 2>&1
130134
131135
- name: Switch to kind-cluster context to print logs
132136
if: always() && steps.deploy.outcome == 'success'

src/codeflare_sdk/common/utils/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@
1212
"3.11": CUDA_PY311_RUNTIME_IMAGE,
1313
"3.12": CUDA_PY312_RUNTIME_IMAGE,
1414
}
15-
MOUNT_PATH = "/home/ray/scripts"
15+
MOUNT_PATH = "/home/ray/files"
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
# Copyright 2025 IBM, Red Hat
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Tests for common/utils/utils.py
17+
"""
18+
19+
import pytest
20+
from collections import namedtuple
21+
from codeflare_sdk.common.utils.utils import (
22+
update_image,
23+
get_ray_image_for_python_version,
24+
)
25+
from codeflare_sdk.common.utils.constants import (
26+
SUPPORTED_PYTHON_VERSIONS,
27+
CUDA_PY311_RUNTIME_IMAGE,
28+
CUDA_PY312_RUNTIME_IMAGE,
29+
)
30+
31+
32+
def test_update_image_with_empty_string_python_311(mocker):
33+
"""Test that update_image() with empty string returns default image for Python 3.11."""
34+
# Mock sys.version_info to simulate Python 3.11
35+
VersionInfo = namedtuple(
36+
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
37+
)
38+
mocker.patch("sys.version_info", VersionInfo(3, 11, 0, "final", 0))
39+
40+
# Test with empty image (should use default for Python 3.11)
41+
image = update_image("")
42+
assert image == CUDA_PY311_RUNTIME_IMAGE
43+
assert image == SUPPORTED_PYTHON_VERSIONS["3.11"]
44+
45+
46+
def test_update_image_with_empty_string_python_312(mocker):
47+
"""Test that update_image() with empty string returns default image for Python 3.12."""
48+
# Mock sys.version_info to simulate Python 3.12
49+
VersionInfo = namedtuple(
50+
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
51+
)
52+
mocker.patch("sys.version_info", VersionInfo(3, 12, 0, "final", 0))
53+
54+
# Test with empty image (should use default for Python 3.12)
55+
image = update_image("")
56+
assert image == CUDA_PY312_RUNTIME_IMAGE
57+
assert image == SUPPORTED_PYTHON_VERSIONS["3.12"]
58+
59+
60+
def test_update_image_with_none_python_311(mocker):
61+
"""Test that update_image() with None returns default image for Python 3.11."""
62+
# Mock sys.version_info to simulate Python 3.11
63+
VersionInfo = namedtuple(
64+
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
65+
)
66+
mocker.patch("sys.version_info", VersionInfo(3, 11, 0, "final", 0))
67+
68+
# Test with None image (should use default for Python 3.11)
69+
image = update_image(None)
70+
assert image == CUDA_PY311_RUNTIME_IMAGE
71+
72+
73+
def test_update_image_with_none_python_312(mocker):
74+
"""Test that update_image() with None returns default image for Python 3.12."""
75+
# Mock sys.version_info to simulate Python 3.12
76+
VersionInfo = namedtuple(
77+
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
78+
)
79+
mocker.patch("sys.version_info", VersionInfo(3, 12, 0, "final", 0))
80+
81+
# Test with None image (should use default for Python 3.12)
82+
image = update_image(None)
83+
assert image == CUDA_PY312_RUNTIME_IMAGE
84+
85+
86+
def test_update_image_with_unsupported_python_version(mocker):
87+
"""Test update_image() warning for unsupported Python versions."""
88+
# Mock sys.version_info to simulate Python 3.8 (unsupported)
89+
VersionInfo = namedtuple(
90+
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
91+
)
92+
mocker.patch("sys.version_info", VersionInfo(3, 8, 0, "final", 0))
93+
94+
# Mock warnings.warn to check if it gets called
95+
warn_mock = mocker.patch("warnings.warn")
96+
97+
# Call update_image with empty image
98+
image = update_image("")
99+
100+
# Assert that the warning was called with the expected message
101+
warn_mock.assert_called_once()
102+
assert "No default Ray image defined for 3.8" in warn_mock.call_args[0][0]
103+
assert "3.11, 3.12" in warn_mock.call_args[0][0]
104+
105+
# Assert that no image was set since the Python version is not supported
106+
assert image is None
107+
108+
109+
def test_update_image_with_provided_custom_image():
110+
"""Test that providing a custom image bypasses auto-detection."""
111+
custom_image = "my-custom-ray:latest"
112+
image = update_image(custom_image)
113+
114+
# Should return the provided image unchanged
115+
assert image == custom_image
116+
117+
118+
def test_update_image_with_provided_image_empty_string():
119+
"""Test update_image() with provided custom image as a non-empty string."""
120+
custom_image = "docker.io/rayproject/ray:2.40.0"
121+
image = update_image(custom_image)
122+
123+
# Should return the provided image unchanged
124+
assert image == custom_image
125+
126+
127+
def test_get_ray_image_for_python_version_explicit_311():
128+
"""Test get_ray_image_for_python_version() with explicit Python 3.11."""
129+
image = get_ray_image_for_python_version("3.11")
130+
assert image == CUDA_PY311_RUNTIME_IMAGE
131+
132+
133+
def test_get_ray_image_for_python_version_explicit_312():
134+
"""Test get_ray_image_for_python_version() with explicit Python 3.12."""
135+
image = get_ray_image_for_python_version("3.12")
136+
assert image == CUDA_PY312_RUNTIME_IMAGE
137+
138+
139+
def test_get_ray_image_for_python_version_auto_detect_311(mocker):
140+
"""Test get_ray_image_for_python_version() auto-detects Python 3.11."""
141+
# Mock sys.version_info to simulate Python 3.11
142+
VersionInfo = namedtuple(
143+
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
144+
)
145+
mocker.patch("sys.version_info", VersionInfo(3, 11, 0, "final", 0))
146+
147+
# Test with None (should auto-detect)
148+
image = get_ray_image_for_python_version()
149+
assert image == CUDA_PY311_RUNTIME_IMAGE
150+
151+
152+
def test_get_ray_image_for_python_version_auto_detect_312(mocker):
153+
"""Test get_ray_image_for_python_version() auto-detects Python 3.12."""
154+
# Mock sys.version_info to simulate Python 3.12
155+
VersionInfo = namedtuple(
156+
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
157+
)
158+
mocker.patch("sys.version_info", VersionInfo(3, 12, 0, "final", 0))
159+
160+
# Test with None (should auto-detect)
161+
image = get_ray_image_for_python_version()
162+
assert image == CUDA_PY312_RUNTIME_IMAGE
163+
164+
165+
def test_get_ray_image_for_python_version_unsupported_with_warning(mocker):
166+
"""Test get_ray_image_for_python_version() warns for unsupported versions."""
167+
warn_mock = mocker.patch("warnings.warn")
168+
169+
# Test with unsupported version and warn_on_unsupported=True (default)
170+
image = get_ray_image_for_python_version("3.9", warn_on_unsupported=True)
171+
172+
# Should have warned
173+
warn_mock.assert_called_once()
174+
assert "No default Ray image defined for 3.9" in warn_mock.call_args[0][0]
175+
176+
# Should return None
177+
assert image is None
178+
179+
180+
def test_get_ray_image_for_python_version_unsupported_without_warning():
181+
"""Test get_ray_image_for_python_version() falls back to 3.12 without warning."""
182+
# Test with unsupported version and warn_on_unsupported=False
183+
image = get_ray_image_for_python_version("3.10", warn_on_unsupported=False)
184+
185+
# Should fall back to Python 3.12 image
186+
assert image == CUDA_PY312_RUNTIME_IMAGE
187+
188+
189+
def test_get_ray_image_for_python_version_unsupported_silent_fallback():
190+
"""Test get_ray_image_for_python_version() silently falls back for old versions."""
191+
# Test with Python 3.8 and warn_on_unsupported=False
192+
image = get_ray_image_for_python_version("3.8", warn_on_unsupported=False)
193+
194+
# Should fall back to Python 3.12 image without warning
195+
assert image == CUDA_PY312_RUNTIME_IMAGE
196+
197+
198+
def test_get_ray_image_for_python_version_none_defaults_to_current(mocker):
199+
"""Test that passing None to get_ray_image_for_python_version() uses current Python."""
200+
# Mock sys.version_info to simulate Python 3.11
201+
VersionInfo = namedtuple(
202+
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
203+
)
204+
mocker.patch("sys.version_info", VersionInfo(3, 11, 5, "final", 0))
205+
206+
# Passing None should detect the mocked version
207+
image = get_ray_image_for_python_version(None, warn_on_unsupported=True)
208+
209+
assert image == CUDA_PY311_RUNTIME_IMAGE

src/codeflare_sdk/ray/rayjobs/config.py

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -448,70 +448,70 @@ def _build_env_vars(self) -> list:
448448
"""Build environment variables list."""
449449
return [V1EnvVar(name=key, value=value) for key, value in self.envs.items()]
450450

451-
def add_script_volumes(self, configmap_name: str, mount_path: str = MOUNT_PATH):
451+
def add_file_volumes(self, configmap_name: str, mount_path: str = MOUNT_PATH):
452452
"""
453-
Add script volume and mount references to cluster configuration.
453+
Add file volume and mount references to cluster configuration.
454454
455455
Args:
456-
configmap_name: Name of the ConfigMap containing scripts
457-
mount_path: Where to mount scripts in containers (default: /home/ray/scripts)
456+
configmap_name: Name of the ConfigMap containing files
457+
mount_path: Where to mount files in containers (default: /home/ray/scripts)
458458
"""
459-
# Check if script volume already exists
460-
volume_name = "ray-job-scripts"
459+
# Check if file volume already exists
460+
volume_name = "ray-job-files"
461461
existing_volume = next(
462462
(v for v in self.volumes if getattr(v, "name", None) == volume_name), None
463463
)
464464
if existing_volume:
465-
logger.debug(f"Script volume '{volume_name}' already exists, skipping...")
465+
logger.debug(f"File volume '{volume_name}' already exists, skipping...")
466466
return
467467

468-
# Check if script mount already exists
468+
# Check if file mount already exists
469469
existing_mount = next(
470470
(m for m in self.volume_mounts if getattr(m, "name", None) == volume_name),
471471
None,
472472
)
473473
if existing_mount:
474474
logger.debug(
475-
f"Script volume mount '{volume_name}' already exists, skipping..."
475+
f"File volume mount '{volume_name}' already exists, skipping..."
476476
)
477477
return
478478

479-
# Add script volume to cluster configuration
480-
script_volume = V1Volume(
479+
# Add file volume to cluster configuration
480+
file_volume = V1Volume(
481481
name=volume_name, config_map=V1ConfigMapVolumeSource(name=configmap_name)
482482
)
483-
self.volumes.append(script_volume)
483+
self.volumes.append(file_volume)
484484

485-
# Add script volume mount to cluster configuration
486-
script_mount = V1VolumeMount(name=volume_name, mount_path=mount_path)
487-
self.volume_mounts.append(script_mount)
485+
# Add file volume mount to cluster configuration
486+
file_mount = V1VolumeMount(name=volume_name, mount_path=mount_path)
487+
self.volume_mounts.append(file_mount)
488488

489489
logger.info(
490-
f"Added script volume '{configmap_name}' to cluster config: mount_path={mount_path}"
490+
f"Added file volume '{configmap_name}' to cluster config: mount_path={mount_path}"
491491
)
492492

493-
def validate_configmap_size(self, scripts: Dict[str, str]) -> None:
494-
total_size = sum(len(content.encode("utf-8")) for content in scripts.values())
493+
def validate_configmap_size(self, files: Dict[str, str]) -> None:
494+
total_size = sum(len(content.encode("utf-8")) for content in files.values())
495495
if total_size > 1024 * 1024: # 1MB
496496
raise ValueError(
497497
f"ConfigMap size exceeds 1MB limit. Total size: {total_size} bytes"
498498
)
499499

500-
def build_script_configmap_spec(
501-
self, job_name: str, namespace: str, scripts: Dict[str, str]
500+
def build_file_configmap_spec(
501+
self, job_name: str, namespace: str, files: Dict[str, str]
502502
) -> Dict[str, Any]:
503503
"""
504-
Build ConfigMap specification for scripts
504+
Build ConfigMap specification for files
505505
506506
Args:
507507
job_name: Name of the RayJob (used for ConfigMap naming)
508508
namespace: Kubernetes namespace
509-
scripts: Dictionary of script_name -> script_content
509+
files: Dictionary of file_name -> file_content
510510
511511
Returns:
512512
Dict: ConfigMap specification ready for Kubernetes API
513513
"""
514-
configmap_name = f"{job_name}-scripts"
514+
configmap_name = f"{job_name}-files"
515515
return {
516516
"apiVersion": "v1",
517517
"kind": "ConfigMap",
@@ -521,27 +521,27 @@ def build_script_configmap_spec(
521521
"labels": {
522522
"ray.io/job-name": job_name,
523523
"app.kubernetes.io/managed-by": "codeflare-sdk",
524-
"app.kubernetes.io/component": "rayjob-scripts",
524+
"app.kubernetes.io/component": "rayjob-files",
525525
},
526526
},
527-
"data": scripts,
527+
"data": files,
528528
}
529529

530-
def build_script_volume_specs(
530+
def build_file_volume_specs(
531531
self, configmap_name: str, mount_path: str = MOUNT_PATH
532532
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
533533
"""
534-
Build volume and mount specifications for scripts
534+
Build volume and mount specifications for files
535535
536536
Args:
537-
configmap_name: Name of the ConfigMap containing scripts
538-
mount_path: Where to mount scripts in containers
537+
configmap_name: Name of the ConfigMap containing files
538+
mount_path: Where to mount files in containers
539539
540540
Returns:
541541
Tuple of (volume_spec, mount_spec) as dictionaries
542542
"""
543-
volume_spec = {"name": "ray-job-scripts", "configMap": {"name": configmap_name}}
543+
volume_spec = {"name": "ray-job-files", "configMap": {"name": configmap_name}}
544544

545-
mount_spec = {"name": "ray-job-scripts", "mountPath": mount_path}
545+
mount_spec = {"name": "ray-job-files", "mountPath": mount_path}
546546

547547
return volume_spec, mount_spec

0 commit comments

Comments
 (0)