Skip to content

Commit 86a1cd7

Browse files
committed
Resolve RayJob E2E test failures and add entrypoint validation
1 parent a348747 commit 86a1cd7

File tree

3 files changed

+263
-120
lines changed

3 files changed

+263
-120
lines changed

src/codeflare_sdk/ray/rayjobs/rayjob.py

Lines changed: 71 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -457,81 +457,93 @@ def _validate_cluster_config_image(self):
457457

458458
def _validate_working_dir_entrypoint(self):
459459
"""
460-
Validate that entrypoint doesn't redundantly reference the working_dir.
460+
Validate entrypoint file configuration.
461461
462-
This prevents a common mistake where users specify both working_dir and
463-
reference the same directory in their entrypoint, causing the job to fail.
462+
Checks:
463+
1. Entrypoint doesn't redundantly reference working_dir
464+
2. Local files exist before submission
464465
465-
Example problem:
466-
working_dir = "./mydir"
467-
entrypoint = "python ./mydir/script.py" # Wrong! Will look for ./mydir/mydir/script.py
468-
469-
Should be:
470-
entrypoint = "python script.py" # Correct! Runs from within working_dir
471-
472-
Raises:
473-
ValueError: If a redundant directory reference is detected
466+
Raises ValueError if validation fails.
474467
"""
475-
# Get working_dir from runtime_env if it exists
476-
if not self.runtime_env:
468+
# Skip validation for inline commands (python -c, etc.)
469+
if re.search(r"\s+-c\s+", self.entrypoint):
477470
return
478471

479-
runtime_env_dict = (
480-
self.runtime_env.to_dict()
481-
if hasattr(self.runtime_env, "to_dict")
482-
else self.runtime_env
483-
)
484-
if not runtime_env_dict or "working_dir" not in runtime_env_dict:
472+
# Match Python file references only
473+
file_pattern = r"(?:python\d?\s+)?([./\w/-]+\.py)"
474+
matches = re.findall(file_pattern, self.entrypoint)
475+
476+
if not matches:
485477
return
486478

487-
working_dir = runtime_env_dict["working_dir"]
479+
entrypoint_path = matches[0]
488480

489-
# Only validate local working_dir (not remote URLs)
490-
if not os.path.isdir(working_dir):
491-
return
481+
# Get working_dir from runtime_env
482+
runtime_env_dict = None
483+
working_dir = None
492484

493-
# Extract Python file path from entrypoint using the same pattern as runtime_env.py
494-
# Pattern matches: test.py, ./test.py, dir/test.py, my-dir/test.py
495-
python_file_pattern = r"(?:python\s+)?([./\w/-]+\.py)"
496-
matches = re.findall(python_file_pattern, self.entrypoint)
485+
if self.runtime_env:
486+
runtime_env_dict = (
487+
self.runtime_env.to_dict()
488+
if hasattr(self.runtime_env, "to_dict")
489+
else self.runtime_env
490+
)
491+
if runtime_env_dict and "working_dir" in runtime_env_dict:
492+
working_dir = runtime_env_dict["working_dir"]
497493

498-
if not matches:
499-
return # No Python file found in entrypoint
494+
# Skip all validation for remote working_dir
495+
if working_dir and not os.path.isdir(working_dir):
496+
return
500497

501-
entrypoint_path = matches[0] # Get first Python file reference
498+
# Case 1: Local working_dir - check redundancy and file existence
499+
if working_dir:
500+
normalized_working_dir = os.path.normpath(working_dir)
501+
normalized_entrypoint = os.path.normpath(entrypoint_path)
502502

503-
# Normalize paths for comparison (remove ./, trailing /, etc.)
504-
normalized_working_dir = os.path.normpath(working_dir)
505-
normalized_entrypoint = os.path.normpath(entrypoint_path)
503+
# Check for redundant directory reference
504+
if normalized_entrypoint.startswith(normalized_working_dir + os.sep):
505+
relative_to_working_dir = os.path.relpath(
506+
normalized_entrypoint, normalized_working_dir
507+
)
508+
working_dir_basename = os.path.basename(normalized_working_dir)
509+
redundant_nested_path = os.path.join(
510+
normalized_working_dir,
511+
working_dir_basename,
512+
relative_to_working_dir,
513+
)
506514

507-
# Check if entrypoint path starts with working_dir path
508-
# This indicates potential redundant directory reference
509-
if normalized_entrypoint.startswith(normalized_working_dir + os.sep):
510-
# Extract the path that would be searched for (the redundant nested path)
511-
relative_to_working_dir = os.path.relpath(
512-
normalized_entrypoint, normalized_working_dir
513-
)
514-
# The redundant path would be: working_dir / basename(working_dir) / relative_path
515-
working_dir_basename = os.path.basename(normalized_working_dir)
516-
redundant_nested_path = os.path.join(
517-
normalized_working_dir, working_dir_basename, relative_to_working_dir
518-
)
515+
if not os.path.exists(redundant_nested_path):
516+
raise ValueError(
517+
f"❌ Working directory conflict detected:\n"
518+
f" working_dir: '{working_dir}'\n"
519+
f" entrypoint references: '{entrypoint_path}'\n"
520+
f"\n"
521+
f"This will fail because the entrypoint runs from within working_dir.\n"
522+
f"It would look for: '{redundant_nested_path}' (which doesn't exist)\n"
523+
f"\n"
524+
f"Fix: Remove the directory prefix from your entrypoint:\n"
525+
f' entrypoint = "python {relative_to_working_dir}"'
526+
)
519527

520-
# Check if the redundant nested path actually exists on disk
521-
# If it doesn't exist, this is likely a user error
522-
# If it does exist, it's a legitimate nested directory structure
523-
if not os.path.exists(redundant_nested_path):
524-
# This is a user error - block it with helpful message
528+
# Check file exists within working_dir
529+
if not normalized_entrypoint.startswith(normalized_working_dir + os.sep):
530+
full_entrypoint_path = os.path.join(working_dir, entrypoint_path)
531+
if not os.path.isfile(full_entrypoint_path):
532+
raise ValueError(
533+
f"❌ Entrypoint file not found:\n"
534+
f" Looking for: '{full_entrypoint_path}'\n"
535+
f" (working_dir: '{working_dir}', entrypoint file: '{entrypoint_path}')\n"
536+
f"\n"
537+
f"Please ensure the file exists at the expected location."
538+
)
539+
540+
# Case 2: No working_dir - validate local file exists
541+
else:
542+
if not os.path.isfile(entrypoint_path):
525543
raise ValueError(
526-
f"❌ Working directory conflict detected:\n"
527-
f" working_dir: '{working_dir}'\n"
528-
f" entrypoint references: '{entrypoint_path}'\n"
529-
f"\n"
530-
f"This will fail because the entrypoint runs from within working_dir.\n"
531-
f"It would look for: '{redundant_nested_path}' (which doesn't exist)\n"
544+
f"❌ Entrypoint file not found: '{entrypoint_path}'\n"
532545
f"\n"
533-
f"Fix: Remove the directory prefix from your entrypoint:\n"
534-
f' entrypoint = "python {relative_to_working_dir}"'
546+
f"Please ensure the file exists at the specified path."
535547
)
536548

537549
def status(

0 commit comments

Comments
 (0)