Skip to content

Commit a348747

Browse files
committed
add validation for nested directories and unit tests
1 parent b195de7 commit a348747

File tree

2 files changed

+336
-0
lines changed

2 files changed

+336
-0
lines changed

src/codeflare_sdk/ray/rayjobs/rayjob.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
"""
1818

1919
import logging
20+
import os
21+
import re
2022
import warnings
2123
from typing import Dict, Any, Optional, Tuple, Union
2224

@@ -160,7 +162,9 @@ def submit(self) -> str:
160162
if not self.entrypoint:
161163
raise ValueError("Entrypoint must be provided to submit a RayJob")
162164

165+
# Validate configuration before submitting
163166
self._validate_ray_version_compatibility()
167+
self._validate_working_dir_entrypoint()
164168

165169
# Extract files from entrypoint and runtime_env working_dir
166170
files = extract_all_local_files(self)
@@ -451,6 +455,85 @@ def _validate_cluster_config_image(self):
451455
elif is_warning:
452456
warnings.warn(f"Cluster config image: {message}")
453457

458+
def _validate_working_dir_entrypoint(self):
459+
"""
460+
Validate that entrypoint doesn't redundantly reference the working_dir.
461+
462+
This prevents a common mistake where users specify both working_dir and
463+
reference the same directory in their entrypoint, causing the job to fail.
464+
465+
Example problem:
466+
working_dir = "./mydir"
467+
entrypoint = "python ./mydir/script.py" # Wrong! Will look for ./mydir/mydir/script.py
468+
469+
Should be:
470+
entrypoint = "python script.py" # Correct! Runs from within working_dir
471+
472+
Raises:
473+
ValueError: If a redundant directory reference is detected
474+
"""
475+
# Get working_dir from runtime_env if it exists
476+
if not self.runtime_env:
477+
return
478+
479+
runtime_env_dict = (
480+
self.runtime_env.to_dict()
481+
if hasattr(self.runtime_env, "to_dict")
482+
else self.runtime_env
483+
)
484+
if not runtime_env_dict or "working_dir" not in runtime_env_dict:
485+
return
486+
487+
working_dir = runtime_env_dict["working_dir"]
488+
489+
# Only validate local working_dir (not remote URLs)
490+
if not os.path.isdir(working_dir):
491+
return
492+
493+
# Extract Python file path from entrypoint using the same pattern as runtime_env.py
494+
# Pattern matches: test.py, ./test.py, dir/test.py, my-dir/test.py
495+
python_file_pattern = r"(?:python\s+)?([./\w/-]+\.py)"
496+
matches = re.findall(python_file_pattern, self.entrypoint)
497+
498+
if not matches:
499+
return # No Python file found in entrypoint
500+
501+
entrypoint_path = matches[0] # Get first Python file reference
502+
503+
# Normalize paths for comparison (remove ./, trailing /, etc.)
504+
normalized_working_dir = os.path.normpath(working_dir)
505+
normalized_entrypoint = os.path.normpath(entrypoint_path)
506+
507+
# Check if entrypoint path starts with working_dir path
508+
# This indicates potential redundant directory reference
509+
if normalized_entrypoint.startswith(normalized_working_dir + os.sep):
510+
# Extract the path that would be searched for (the redundant nested path)
511+
relative_to_working_dir = os.path.relpath(
512+
normalized_entrypoint, normalized_working_dir
513+
)
514+
# The redundant path would be: working_dir / basename(working_dir) / relative_path
515+
working_dir_basename = os.path.basename(normalized_working_dir)
516+
redundant_nested_path = os.path.join(
517+
normalized_working_dir, working_dir_basename, relative_to_working_dir
518+
)
519+
520+
# Check if the redundant nested path actually exists on disk
521+
# If it doesn't exist, this is likely a user error
522+
# If it does exist, it's a legitimate nested directory structure
523+
if not os.path.exists(redundant_nested_path):
524+
# This is a user error - block it with helpful message
525+
raise ValueError(
526+
f"❌ Working directory conflict detected:\n"
527+
f" working_dir: '{working_dir}'\n"
528+
f" entrypoint references: '{entrypoint_path}'\n"
529+
f"\n"
530+
f"This will fail because the entrypoint runs from within working_dir.\n"
531+
f"It would look for: '{redundant_nested_path}' (which doesn't exist)\n"
532+
f"\n"
533+
f"Fix: Remove the directory prefix from your entrypoint:\n"
534+
f' entrypoint = "python {relative_to_working_dir}"'
535+
)
536+
454537
def status(
455538
self, print_to_console: bool = True
456539
) -> Tuple[CodeflareRayJobStatus, bool]:

src/codeflare_sdk/ray/rayjobs/test/test_rayjob.py

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,3 +1401,256 @@ def test_build_submitter_pod_template_with_files(auto_mock_setup):
14011401
# Verify paths match keys
14021402
for item in config_map_items:
14031403
assert item["key"] == item["path"]
1404+
1405+
1406+
def test_validate_working_dir_entrypoint_no_runtime_env(auto_mock_setup):
1407+
"""
1408+
Test validation passes when no runtime_env is specified.
1409+
"""
1410+
rayjob = RayJob(
1411+
job_name="test-job",
1412+
cluster_name="test-cluster",
1413+
entrypoint="python script.py",
1414+
namespace="test-namespace",
1415+
)
1416+
1417+
# Should not raise any exception
1418+
rayjob._validate_working_dir_entrypoint()
1419+
1420+
1421+
def test_validate_working_dir_entrypoint_no_working_dir(auto_mock_setup):
1422+
"""
1423+
Test validation passes when runtime_env has no working_dir.
1424+
"""
1425+
rayjob = RayJob(
1426+
job_name="test-job",
1427+
cluster_name="test-cluster",
1428+
entrypoint="python script.py",
1429+
namespace="test-namespace",
1430+
runtime_env=RuntimeEnv(pip=["numpy"]),
1431+
)
1432+
1433+
# Should not raise any exception
1434+
rayjob._validate_working_dir_entrypoint()
1435+
1436+
1437+
def test_validate_working_dir_entrypoint_remote_working_dir(auto_mock_setup):
1438+
"""
1439+
Test validation skips remote working_dir (URL).
1440+
"""
1441+
rayjob = RayJob(
1442+
job_name="test-job",
1443+
cluster_name="test-cluster",
1444+
entrypoint="python script.py",
1445+
namespace="test-namespace",
1446+
runtime_env=RuntimeEnv(
1447+
working_dir="https://github.com/user/repo/archive/main.zip"
1448+
),
1449+
)
1450+
1451+
# Should not raise any exception (remote URL skipped)
1452+
rayjob._validate_working_dir_entrypoint()
1453+
1454+
1455+
def test_validate_working_dir_entrypoint_no_python_file(auto_mock_setup):
1456+
"""
1457+
Test validation passes when entrypoint has no Python file reference.
1458+
"""
1459+
rayjob = RayJob(
1460+
job_name="test-job",
1461+
cluster_name="test-cluster",
1462+
entrypoint="echo 'hello world'", # No Python file
1463+
namespace="test-namespace",
1464+
runtime_env=RuntimeEnv(working_dir="."),
1465+
)
1466+
1467+
# Should not raise any exception
1468+
rayjob._validate_working_dir_entrypoint()
1469+
1470+
1471+
def test_validate_working_dir_entrypoint_no_redundancy(auto_mock_setup, tmp_path):
1472+
"""
1473+
Test validation passes when entrypoint doesn't reference working_dir.
1474+
"""
1475+
# Create test directory and file
1476+
test_dir = tmp_path / "testdir"
1477+
test_dir.mkdir()
1478+
script_file = test_dir / "script.py"
1479+
script_file.write_text("print('hello')")
1480+
1481+
rayjob = RayJob(
1482+
job_name="test-job",
1483+
cluster_name="test-cluster",
1484+
entrypoint="python script.py", # No directory prefix
1485+
namespace="test-namespace",
1486+
runtime_env=RuntimeEnv(working_dir=str(test_dir)),
1487+
)
1488+
1489+
# Should not raise any exception
1490+
rayjob._validate_working_dir_entrypoint()
1491+
1492+
1493+
def test_validate_working_dir_entrypoint_redundant_reference_error(
1494+
auto_mock_setup, tmp_path
1495+
):
1496+
"""
1497+
Test validation raises error when entrypoint redundantly references working_dir.
1498+
"""
1499+
# Create test directory and file
1500+
test_dir = tmp_path / "testdir"
1501+
test_dir.mkdir()
1502+
script_file = test_dir / "script.py"
1503+
script_file.write_text("print('hello')")
1504+
1505+
rayjob = RayJob(
1506+
job_name="test-job",
1507+
cluster_name="test-cluster",
1508+
entrypoint=f"python {test_dir}/script.py", # Redundant reference
1509+
namespace="test-namespace",
1510+
runtime_env=RuntimeEnv(working_dir=str(test_dir)),
1511+
)
1512+
1513+
# Should raise ValueError with helpful message
1514+
with pytest.raises(ValueError, match="Working directory conflict detected"):
1515+
rayjob._validate_working_dir_entrypoint()
1516+
1517+
1518+
def test_validate_working_dir_entrypoint_with_dot_slash(auto_mock_setup, tmp_path):
1519+
"""
1520+
Test validation handles paths with ./ prefix correctly.
1521+
"""
1522+
# Create test directory and file
1523+
test_dir = tmp_path / "testdir"
1524+
test_dir.mkdir()
1525+
script_file = test_dir / "script.py"
1526+
script_file.write_text("print('hello')")
1527+
1528+
# Change to temp directory so relative paths work
1529+
import os
1530+
1531+
original_cwd = os.getcwd()
1532+
try:
1533+
os.chdir(tmp_path)
1534+
1535+
rayjob = RayJob(
1536+
job_name="test-job",
1537+
cluster_name="test-cluster",
1538+
entrypoint="python ./testdir/script.py", # With ./ prefix
1539+
namespace="test-namespace",
1540+
runtime_env=RuntimeEnv(working_dir="./testdir"), # With ./ prefix
1541+
)
1542+
1543+
# Should raise ValueError (redundant reference)
1544+
with pytest.raises(ValueError, match="Working directory conflict detected"):
1545+
rayjob._validate_working_dir_entrypoint()
1546+
finally:
1547+
os.chdir(original_cwd)
1548+
1549+
1550+
def test_validate_working_dir_entrypoint_submit_integration(auto_mock_setup, tmp_path):
1551+
"""
1552+
Test that validation is called during submit() and blocks submission.
1553+
"""
1554+
mock_api_instance = auto_mock_setup["rayjob_api"]
1555+
mock_api_instance.submit_job.return_value = {"metadata": {"name": "test-job"}}
1556+
1557+
# Create test directory and file
1558+
test_dir = tmp_path / "testdir"
1559+
test_dir.mkdir()
1560+
script_file = test_dir / "script.py"
1561+
script_file.write_text("print('hello')")
1562+
1563+
rayjob = RayJob(
1564+
job_name="test-job",
1565+
cluster_name="test-cluster",
1566+
entrypoint=f"python {test_dir}/script.py", # Redundant reference
1567+
namespace="test-namespace",
1568+
runtime_env=RuntimeEnv(working_dir=str(test_dir)),
1569+
)
1570+
1571+
# Should raise ValueError during submit() before API call
1572+
with pytest.raises(ValueError, match="Working directory conflict detected"):
1573+
rayjob.submit()
1574+
1575+
# Verify submit_job was never called (validation blocked it)
1576+
mock_api_instance.submit_job.assert_not_called()
1577+
1578+
1579+
def test_validate_working_dir_entrypoint_error_message_format(
1580+
auto_mock_setup, tmp_path
1581+
):
1582+
"""
1583+
Test that error message contains helpful information.
1584+
"""
1585+
# Create test directory and file
1586+
test_dir = tmp_path / "testdir"
1587+
test_dir.mkdir()
1588+
script_file = test_dir / "script.py"
1589+
script_file.write_text("print('hello')")
1590+
1591+
rayjob = RayJob(
1592+
job_name="test-job",
1593+
cluster_name="test-cluster",
1594+
entrypoint=f"python {test_dir}/script.py",
1595+
namespace="test-namespace",
1596+
runtime_env=RuntimeEnv(working_dir=str(test_dir)),
1597+
)
1598+
1599+
try:
1600+
rayjob._validate_working_dir_entrypoint()
1601+
assert False, "Should have raised ValueError"
1602+
except ValueError as e:
1603+
error_msg = str(e)
1604+
# Verify error message contains key information
1605+
assert "Working directory conflict detected" in error_msg
1606+
assert "working_dir:" in error_msg
1607+
assert "entrypoint references:" in error_msg
1608+
assert "Fix: Remove the directory prefix" in error_msg
1609+
assert "python script.py" in error_msg # Suggested fix
1610+
1611+
1612+
def test_validate_working_dir_entrypoint_subdirectory_valid(auto_mock_setup, tmp_path):
1613+
"""
1614+
Test validation passes when entrypoint references subdirectory within working_dir.
1615+
"""
1616+
# Create test directory structure: testdir/subdir/script.py
1617+
test_dir = tmp_path / "testdir"
1618+
test_dir.mkdir()
1619+
sub_dir = test_dir / "subdir"
1620+
sub_dir.mkdir()
1621+
script_file = sub_dir / "script.py"
1622+
script_file.write_text("print('hello')")
1623+
1624+
rayjob = RayJob(
1625+
job_name="test-job",
1626+
cluster_name="test-cluster",
1627+
entrypoint="python subdir/script.py", # Correct: relative to working_dir
1628+
namespace="test-namespace",
1629+
runtime_env=RuntimeEnv(working_dir=str(test_dir)),
1630+
)
1631+
1632+
# Should not raise any exception
1633+
rayjob._validate_working_dir_entrypoint()
1634+
1635+
1636+
def test_validate_working_dir_entrypoint_runtime_env_as_dict(auto_mock_setup, tmp_path):
1637+
"""
1638+
Test validation works when runtime_env is passed as dict (not RuntimeEnv object).
1639+
"""
1640+
# Create test directory and file
1641+
test_dir = tmp_path / "testdir"
1642+
test_dir.mkdir()
1643+
script_file = test_dir / "script.py"
1644+
script_file.write_text("print('hello')")
1645+
1646+
rayjob = RayJob(
1647+
job_name="test-job",
1648+
cluster_name="test-cluster",
1649+
entrypoint=f"python {test_dir}/script.py", # Redundant reference
1650+
namespace="test-namespace",
1651+
runtime_env={"working_dir": str(test_dir)}, # Dict instead of RuntimeEnv
1652+
)
1653+
1654+
# Should raise ValueError even with dict runtime_env
1655+
with pytest.raises(ValueError, match="Working directory conflict detected"):
1656+
rayjob._validate_working_dir_entrypoint()

0 commit comments

Comments
 (0)