Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/rayjob_e2e_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ jobs:
kubectl create clusterrolebinding sdk-user-service-reader --clusterrole=service-reader --user=sdk-user
kubectl create clusterrole port-forward-pods --verb=create --resource=pods/portforward
kubectl create clusterrolebinding sdk-user-port-forward-pods-binding --clusterrole=port-forward-pods --user=sdk-user
kubectl create clusterrole configmap-manager --verb=get,list,create,delete,update,patch --resource=configmaps
kubectl create clusterrolebinding sdk-user-configmap-manager --clusterrole=configmap-manager --user=sdk-user
kubectl create clusterrole workload-reader --verb=get,list,watch --resource=workloads
kubectl create clusterrolebinding sdk-user-workload-reader --clusterrole=workload-reader --user=sdk-user
kubectl config use-context sdk-user

- name: Run RayJob E2E tests
Expand All @@ -126,7 +130,7 @@ jobs:
pip install poetry
poetry install --with test,docs
echo "Running RayJob e2e tests..."
poetry run pytest -v -s ./tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output_rayjob.log 2>&1
poetry run pytest -v -s ./tests/e2e/rayjob/ > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output_rayjob.log 2>&1

- name: Switch to kind-cluster context to print logs
if: always() && steps.deploy.outcome == 'success'
Expand Down
6 changes: 3 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/codeflare_sdk/common/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
"3.11": CUDA_PY311_RUNTIME_IMAGE,
"3.12": CUDA_PY312_RUNTIME_IMAGE,
}
MOUNT_PATH = "/home/ray/scripts"
MOUNT_PATH = "/home/ray/files"
209 changes: 209 additions & 0 deletions src/codeflare_sdk/common/utils/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
# Copyright 2025 IBM, Red Hat
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Tests for common/utils/utils.py
"""

import pytest
from collections import namedtuple
from codeflare_sdk.common.utils.utils import (
update_image,
get_ray_image_for_python_version,
)
from codeflare_sdk.common.utils.constants import (
SUPPORTED_PYTHON_VERSIONS,
CUDA_PY311_RUNTIME_IMAGE,
CUDA_PY312_RUNTIME_IMAGE,
)


def test_update_image_with_empty_string_python_311(mocker):
"""Test that update_image() with empty string returns default image for Python 3.11."""
# Mock sys.version_info to simulate Python 3.11
VersionInfo = namedtuple(
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
)
mocker.patch("sys.version_info", VersionInfo(3, 11, 0, "final", 0))

# Test with empty image (should use default for Python 3.11)
image = update_image("")
assert image == CUDA_PY311_RUNTIME_IMAGE
assert image == SUPPORTED_PYTHON_VERSIONS["3.11"]


def test_update_image_with_empty_string_python_312(mocker):
"""Test that update_image() with empty string returns default image for Python 3.12."""
# Mock sys.version_info to simulate Python 3.12
VersionInfo = namedtuple(
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
)
mocker.patch("sys.version_info", VersionInfo(3, 12, 0, "final", 0))

# Test with empty image (should use default for Python 3.12)
image = update_image("")
assert image == CUDA_PY312_RUNTIME_IMAGE
assert image == SUPPORTED_PYTHON_VERSIONS["3.12"]


def test_update_image_with_none_python_311(mocker):
"""Test that update_image() with None returns default image for Python 3.11."""
# Mock sys.version_info to simulate Python 3.11
VersionInfo = namedtuple(
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
)
mocker.patch("sys.version_info", VersionInfo(3, 11, 0, "final", 0))

# Test with None image (should use default for Python 3.11)
image = update_image(None)
assert image == CUDA_PY311_RUNTIME_IMAGE


def test_update_image_with_none_python_312(mocker):
"""Test that update_image() with None returns default image for Python 3.12."""
# Mock sys.version_info to simulate Python 3.12
VersionInfo = namedtuple(
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
)
mocker.patch("sys.version_info", VersionInfo(3, 12, 0, "final", 0))

# Test with None image (should use default for Python 3.12)
image = update_image(None)
assert image == CUDA_PY312_RUNTIME_IMAGE


def test_update_image_with_unsupported_python_version(mocker):
"""Test update_image() warning for unsupported Python versions."""
# Mock sys.version_info to simulate Python 3.8 (unsupported)
VersionInfo = namedtuple(
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
)
mocker.patch("sys.version_info", VersionInfo(3, 8, 0, "final", 0))

# Mock warnings.warn to check if it gets called
warn_mock = mocker.patch("warnings.warn")

# Call update_image with empty image
image = update_image("")

# Assert that the warning was called with the expected message
warn_mock.assert_called_once()
assert "No default Ray image defined for 3.8" in warn_mock.call_args[0][0]
assert "3.11, 3.12" in warn_mock.call_args[0][0]

# Assert that no image was set since the Python version is not supported
assert image is None


def test_update_image_with_provided_custom_image():
"""Test that providing a custom image bypasses auto-detection."""
custom_image = "my-custom-ray:latest"
image = update_image(custom_image)

# Should return the provided image unchanged
assert image == custom_image


def test_update_image_with_provided_image_empty_string():
"""Test update_image() with provided custom image as a non-empty string."""
custom_image = "docker.io/rayproject/ray:2.40.0"
image = update_image(custom_image)

# Should return the provided image unchanged
assert image == custom_image


def test_get_ray_image_for_python_version_explicit_311():
"""Test get_ray_image_for_python_version() with explicit Python 3.11."""
image = get_ray_image_for_python_version("3.11")
assert image == CUDA_PY311_RUNTIME_IMAGE


def test_get_ray_image_for_python_version_explicit_312():
"""Test get_ray_image_for_python_version() with explicit Python 3.12."""
image = get_ray_image_for_python_version("3.12")
assert image == CUDA_PY312_RUNTIME_IMAGE


def test_get_ray_image_for_python_version_auto_detect_311(mocker):
"""Test get_ray_image_for_python_version() auto-detects Python 3.11."""
# Mock sys.version_info to simulate Python 3.11
VersionInfo = namedtuple(
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
)
mocker.patch("sys.version_info", VersionInfo(3, 11, 0, "final", 0))

# Test with None (should auto-detect)
image = get_ray_image_for_python_version()
assert image == CUDA_PY311_RUNTIME_IMAGE


def test_get_ray_image_for_python_version_auto_detect_312(mocker):
"""Test get_ray_image_for_python_version() auto-detects Python 3.12."""
# Mock sys.version_info to simulate Python 3.12
VersionInfo = namedtuple(
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
)
mocker.patch("sys.version_info", VersionInfo(3, 12, 0, "final", 0))

# Test with None (should auto-detect)
image = get_ray_image_for_python_version()
assert image == CUDA_PY312_RUNTIME_IMAGE


def test_get_ray_image_for_python_version_unsupported_with_warning(mocker):
"""Test get_ray_image_for_python_version() warns for unsupported versions."""
warn_mock = mocker.patch("warnings.warn")

# Test with unsupported version and warn_on_unsupported=True (default)
image = get_ray_image_for_python_version("3.9", warn_on_unsupported=True)

# Should have warned
warn_mock.assert_called_once()
assert "No default Ray image defined for 3.9" in warn_mock.call_args[0][0]

# Should return None
assert image is None


def test_get_ray_image_for_python_version_unsupported_without_warning():
"""Test get_ray_image_for_python_version() falls back to 3.12 without warning."""
# Test with unsupported version and warn_on_unsupported=False
image = get_ray_image_for_python_version("3.10", warn_on_unsupported=False)

# Should fall back to Python 3.12 image
assert image == CUDA_PY312_RUNTIME_IMAGE


def test_get_ray_image_for_python_version_unsupported_silent_fallback():
"""Test get_ray_image_for_python_version() silently falls back for old versions."""
# Test with Python 3.8 and warn_on_unsupported=False
image = get_ray_image_for_python_version("3.8", warn_on_unsupported=False)

# Should fall back to Python 3.12 image without warning
assert image == CUDA_PY312_RUNTIME_IMAGE


def test_get_ray_image_for_python_version_none_defaults_to_current(mocker):
"""Test that passing None to get_ray_image_for_python_version() uses current Python."""
# Mock sys.version_info to simulate Python 3.11
VersionInfo = namedtuple(
"version_info", ["major", "minor", "micro", "releaselevel", "serial"]
)
mocker.patch("sys.version_info", VersionInfo(3, 11, 5, "final", 0))

# Passing None should detect the mocked version
image = get_ray_image_for_python_version(None, warn_on_unsupported=True)

assert image == CUDA_PY311_RUNTIME_IMAGE
63 changes: 32 additions & 31 deletions src/codeflare_sdk/ray/rayjobs/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ def build_ray_cluster_spec(self, cluster_name: str) -> Dict[str, Any]:
"""
ray_cluster_spec = {
"rayVersion": RAY_VERSION,
"enableInTreeAutoscaling": False, # Required for Kueue-managed jobs
"headGroupSpec": self._build_head_group_spec(),
"workerGroupSpecs": [self._build_worker_group_spec(cluster_name)],
}
Expand Down Expand Up @@ -447,70 +448,70 @@ def _build_env_vars(self) -> list:
"""Build environment variables list."""
return [V1EnvVar(name=key, value=value) for key, value in self.envs.items()]

def add_script_volumes(self, configmap_name: str, mount_path: str = MOUNT_PATH):
def add_file_volumes(self, configmap_name: str, mount_path: str = MOUNT_PATH):
"""
Add script volume and mount references to cluster configuration.
Add file volume and mount references to cluster configuration.

Args:
configmap_name: Name of the ConfigMap containing scripts
mount_path: Where to mount scripts in containers (default: /home/ray/scripts)
configmap_name: Name of the ConfigMap containing files
mount_path: Where to mount files in containers (default: /home/ray/scripts)
"""
# Check if script volume already exists
volume_name = "ray-job-scripts"
# Check if file volume already exists
volume_name = "ray-job-files"
existing_volume = next(
(v for v in self.volumes if getattr(v, "name", None) == volume_name), None
)
if existing_volume:
logger.debug(f"Script volume '{volume_name}' already exists, skipping...")
logger.debug(f"File volume '{volume_name}' already exists, skipping...")
return

# Check if script mount already exists
# Check if file mount already exists
existing_mount = next(
(m for m in self.volume_mounts if getattr(m, "name", None) == volume_name),
None,
)
if existing_mount:
logger.debug(
f"Script volume mount '{volume_name}' already exists, skipping..."
f"File volume mount '{volume_name}' already exists, skipping..."
)
return

# Add script volume to cluster configuration
script_volume = V1Volume(
# Add file volume to cluster configuration
file_volume = V1Volume(
name=volume_name, config_map=V1ConfigMapVolumeSource(name=configmap_name)
)
self.volumes.append(script_volume)
self.volumes.append(file_volume)

# Add script volume mount to cluster configuration
script_mount = V1VolumeMount(name=volume_name, mount_path=mount_path)
self.volume_mounts.append(script_mount)
# Add file volume mount to cluster configuration
file_mount = V1VolumeMount(name=volume_name, mount_path=mount_path)
self.volume_mounts.append(file_mount)

logger.info(
f"Added script volume '{configmap_name}' to cluster config: mount_path={mount_path}"
f"Added file volume '{configmap_name}' to cluster config: mount_path={mount_path}"
)

def validate_configmap_size(self, scripts: Dict[str, str]) -> None:
total_size = sum(len(content.encode("utf-8")) for content in scripts.values())
def validate_configmap_size(self, files: Dict[str, str]) -> None:
total_size = sum(len(content.encode("utf-8")) for content in files.values())
if total_size > 1024 * 1024: # 1MB
raise ValueError(
f"ConfigMap size exceeds 1MB limit. Total size: {total_size} bytes"
)

def build_script_configmap_spec(
self, job_name: str, namespace: str, scripts: Dict[str, str]
def build_file_configmap_spec(
self, job_name: str, namespace: str, files: Dict[str, str]
) -> Dict[str, Any]:
"""
Build ConfigMap specification for scripts
Build ConfigMap specification for files

Args:
job_name: Name of the RayJob (used for ConfigMap naming)
namespace: Kubernetes namespace
scripts: Dictionary of script_name -> script_content
files: Dictionary of file_name -> file_content

Returns:
Dict: ConfigMap specification ready for Kubernetes API
"""
configmap_name = f"{job_name}-scripts"
configmap_name = f"{job_name}-files"
return {
"apiVersion": "v1",
"kind": "ConfigMap",
Expand All @@ -520,27 +521,27 @@ def build_script_configmap_spec(
"labels": {
"ray.io/job-name": job_name,
"app.kubernetes.io/managed-by": "codeflare-sdk",
"app.kubernetes.io/component": "rayjob-scripts",
"app.kubernetes.io/component": "rayjob-files",
},
},
"data": scripts,
"data": files,
}

def build_script_volume_specs(
def build_file_volume_specs(
self, configmap_name: str, mount_path: str = MOUNT_PATH
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
"""
Build volume and mount specifications for scripts
Build volume and mount specifications for files

Args:
configmap_name: Name of the ConfigMap containing scripts
mount_path: Where to mount scripts in containers
configmap_name: Name of the ConfigMap containing files
mount_path: Where to mount files in containers

Returns:
Tuple of (volume_spec, mount_spec) as dictionaries
"""
volume_spec = {"name": "ray-job-scripts", "configMap": {"name": configmap_name}}
volume_spec = {"name": "ray-job-files", "configMap": {"name": configmap_name}}

mount_spec = {"name": "ray-job-scripts", "mountPath": mount_path}
mount_spec = {"name": "ray-job-files", "mountPath": mount_path}

return volume_spec, mount_spec
Loading