Skip to content

Commit 05323d9

Browse files
committed
Trying to bump coverage
Signed-off-by: Pat O'Connor <[email protected]>
1 parent e096a1d commit 05323d9

File tree

1 file changed

+368
-0
lines changed

1 file changed

+368
-0
lines changed

src/codeflare_sdk/ray/cluster/test_cluster.py

Lines changed: 368 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -975,6 +975,374 @@ def test_run_job_with_managed_cluster_validation_error(mocker):
975975
assert "entrypoint must be specified" in str(e)
976976

977977

978+
def test_run_job_with_managed_cluster_failed_job(mocker):
979+
"""Test RayJob execution when job fails."""
980+
from codeflare_sdk.ray.job.job import RayJobSpec
981+
982+
# Mock dependencies
983+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
984+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
985+
mocker.patch("codeflare_sdk.ray.cluster.cluster.config_check")
986+
987+
mock_api_client = mocker.Mock()
988+
mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.get_api_client", return_value=mock_api_client)
989+
990+
mock_co_api = mocker.Mock()
991+
mocker.patch("kubernetes.client.CustomObjectsApi", return_value=mock_co_api)
992+
993+
# Mock Cluster creation
994+
mock_cluster_resource = {"apiVersion": "ray.io/v1", "kind": "RayCluster", "spec": {}}
995+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster.__init__", return_value=None)
996+
mock_cluster_instance = mocker.Mock()
997+
mock_cluster_instance.resource_yaml = mock_cluster_resource
998+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster", return_value=mock_cluster_instance)
999+
1000+
# Mock RayJob creation
1001+
mock_co_api.create_namespaced_custom_object.return_value = {"metadata": {"name": "test-failed-job"}}
1002+
1003+
# Mock job status as FAILED
1004+
mock_status_response = {
1005+
"status": {
1006+
"jobDeploymentStatus": "Running",
1007+
"jobStatus": "FAILED",
1008+
"jobId": "failed-job-123",
1009+
"rayClusterName": "test-cluster",
1010+
"message": "Job failed due to error"
1011+
}
1012+
}
1013+
mock_co_api.get_namespaced_custom_object_status.return_value = mock_status_response
1014+
mocker.patch("time.sleep")
1015+
1016+
cluster_config = ClusterConfiguration(name="test-cluster", namespace="test-ns")
1017+
job_config = RayJobSpec(entrypoint="python failing_script.py")
1018+
1019+
result = Cluster.run_job_with_managed_cluster(
1020+
cluster_config=cluster_config,
1021+
job_config=job_config,
1022+
wait_for_completion=True
1023+
)
1024+
1025+
assert result["job_status"] == "FAILED"
1026+
assert result["job_submission_id"] == "failed-job-123"
1027+
1028+
1029+
def test_run_job_with_managed_cluster_stopped_job(mocker):
1030+
"""Test RayJob execution when job is stopped."""
1031+
from codeflare_sdk.ray.job.job import RayJobSpec
1032+
1033+
# Mock dependencies
1034+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
1035+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
1036+
mocker.patch("codeflare_sdk.ray.cluster.cluster.config_check")
1037+
1038+
mock_api_client = mocker.Mock()
1039+
mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.get_api_client", return_value=mock_api_client)
1040+
1041+
mock_co_api = mocker.Mock()
1042+
mocker.patch("kubernetes.client.CustomObjectsApi", return_value=mock_co_api)
1043+
1044+
# Mock Cluster creation
1045+
mock_cluster_resource = {"apiVersion": "ray.io/v1", "kind": "RayCluster", "spec": {}}
1046+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster.__init__", return_value=None)
1047+
mock_cluster_instance = mocker.Mock()
1048+
mock_cluster_instance.resource_yaml = mock_cluster_resource
1049+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster", return_value=mock_cluster_instance)
1050+
1051+
# Mock RayJob creation
1052+
mock_co_api.create_namespaced_custom_object.return_value = {"metadata": {"name": "test-stopped-job"}}
1053+
1054+
# Mock job status as STOPPED
1055+
mock_status_response = {
1056+
"status": {
1057+
"jobDeploymentStatus": "Running",
1058+
"jobStatus": "STOPPED",
1059+
"jobId": "stopped-job-123"
1060+
}
1061+
}
1062+
mock_co_api.get_namespaced_custom_object_status.return_value = mock_status_response
1063+
mocker.patch("time.sleep")
1064+
1065+
cluster_config = ClusterConfiguration(name="test-cluster", namespace="test-ns")
1066+
job_config = RayJobSpec(entrypoint="python script.py")
1067+
1068+
result = Cluster.run_job_with_managed_cluster(
1069+
cluster_config=cluster_config,
1070+
job_config=job_config,
1071+
wait_for_completion=True
1072+
)
1073+
1074+
assert result["job_status"] == "STOPPED"
1075+
assert result["job_submission_id"] == "stopped-job-123"
1076+
1077+
1078+
def test_run_job_with_managed_cluster_pending_job(mocker):
1079+
"""Test RayJob execution when job is pending then succeeds."""
1080+
from codeflare_sdk.ray.job.job import RayJobSpec
1081+
1082+
# Mock dependencies
1083+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
1084+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
1085+
mocker.patch("codeflare_sdk.ray.cluster.cluster.config_check")
1086+
1087+
mock_api_client = mocker.Mock()
1088+
mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.get_api_client", return_value=mock_api_client)
1089+
1090+
mock_co_api = mocker.Mock()
1091+
mocker.patch("kubernetes.client.CustomObjectsApi", return_value=mock_co_api)
1092+
1093+
# Mock Cluster creation
1094+
mock_cluster_resource = {"apiVersion": "ray.io/v1", "kind": "RayCluster", "spec": {}}
1095+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster.__init__", return_value=None)
1096+
mock_cluster_instance = mocker.Mock()
1097+
mock_cluster_instance.resource_yaml = mock_cluster_resource
1098+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster", return_value=mock_cluster_instance)
1099+
1100+
# Mock RayJob creation
1101+
mock_co_api.create_namespaced_custom_object.return_value = {"metadata": {"name": "test-pending-job"}}
1102+
1103+
# Mock job status progression: PENDING -> RUNNING -> SUCCEEDED
1104+
pending_response = {
1105+
"status": {
1106+
"jobDeploymentStatus": "Initializing",
1107+
"jobStatus": "PENDING",
1108+
"jobId": "pending-job-123"
1109+
}
1110+
}
1111+
running_response = {
1112+
"status": {
1113+
"jobDeploymentStatus": "Running",
1114+
"jobStatus": "RUNNING",
1115+
"jobId": "pending-job-123"
1116+
}
1117+
}
1118+
succeeded_response = {
1119+
"status": {
1120+
"jobDeploymentStatus": "Running",
1121+
"jobStatus": "SUCCEEDED",
1122+
"jobId": "pending-job-123"
1123+
}
1124+
}
1125+
1126+
mock_co_api.get_namespaced_custom_object_status.side_effect = [
1127+
pending_response, running_response, succeeded_response
1128+
]
1129+
mocker.patch("time.sleep")
1130+
1131+
cluster_config = ClusterConfiguration(name="test-cluster", namespace="test-ns")
1132+
job_config = RayJobSpec(entrypoint="python script.py")
1133+
1134+
result = Cluster.run_job_with_managed_cluster(
1135+
cluster_config=cluster_config,
1136+
job_config=job_config,
1137+
wait_for_completion=True
1138+
)
1139+
1140+
assert result["job_status"] == "SUCCEEDED"
1141+
assert result["job_submission_id"] == "pending-job-123"
1142+
1143+
1144+
def test_run_job_with_managed_cluster_api_exception(mocker):
1145+
"""Test RayJob creation with API exception."""
1146+
from codeflare_sdk.ray.job.job import RayJobSpec
1147+
from kubernetes.client.rest import ApiException
1148+
1149+
# Mock dependencies
1150+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
1151+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
1152+
mocker.patch("codeflare_sdk.ray.cluster.cluster.config_check")
1153+
1154+
mock_api_client = mocker.Mock()
1155+
mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.get_api_client", return_value=mock_api_client)
1156+
1157+
mock_co_api = mocker.Mock()
1158+
mocker.patch("kubernetes.client.CustomObjectsApi", return_value=mock_co_api)
1159+
1160+
# Mock Cluster creation
1161+
mock_cluster_resource = {"apiVersion": "ray.io/v1", "kind": "RayCluster", "spec": {}}
1162+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster.__init__", return_value=None)
1163+
mock_cluster_instance = mocker.Mock()
1164+
mock_cluster_instance.resource_yaml = mock_cluster_resource
1165+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster", return_value=mock_cluster_instance)
1166+
1167+
# Mock API exception during job creation
1168+
mock_co_api.create_namespaced_custom_object.side_effect = ApiException(
1169+
status=400, reason="Bad Request", body='{"message": "Invalid RayJob spec"}'
1170+
)
1171+
1172+
cluster_config = ClusterConfiguration(name="test-cluster", namespace="test-ns")
1173+
job_config = RayJobSpec(entrypoint="python script.py")
1174+
1175+
try:
1176+
Cluster.run_job_with_managed_cluster(
1177+
cluster_config=cluster_config,
1178+
job_config=job_config
1179+
)
1180+
assert False, "Expected ApiException"
1181+
except ApiException as e:
1182+
assert e.status == 400
1183+
assert "Bad Request" in str(e)
1184+
1185+
1186+
def test_run_job_with_managed_cluster_missing_status_fields(mocker):
1187+
"""Test RayJob with missing status fields."""
1188+
from codeflare_sdk.ray.job.job import RayJobSpec
1189+
1190+
# Mock dependencies
1191+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
1192+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
1193+
mocker.patch("codeflare_sdk.ray.cluster.cluster.config_check")
1194+
1195+
mock_api_client = mocker.Mock()
1196+
mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.get_api_client", return_value=mock_api_client)
1197+
1198+
mock_co_api = mocker.Mock()
1199+
mocker.patch("kubernetes.client.CustomObjectsApi", return_value=mock_co_api)
1200+
1201+
# Mock Cluster creation
1202+
mock_cluster_resource = {"apiVersion": "ray.io/v1", "kind": "RayCluster", "spec": {}}
1203+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster.__init__", return_value=None)
1204+
mock_cluster_instance = mocker.Mock()
1205+
mock_cluster_instance.resource_yaml = mock_cluster_resource
1206+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster", return_value=mock_cluster_instance)
1207+
1208+
# Mock RayJob creation
1209+
mock_co_api.create_namespaced_custom_object.return_value = {"metadata": {"name": "test-missing-fields"}}
1210+
1211+
# Mock job status with missing fields
1212+
mock_status_response = {
1213+
"status": {
1214+
"jobDeploymentStatus": "Running"
1215+
# Missing jobStatus, jobId, etc.
1216+
}
1217+
}
1218+
mock_co_api.get_namespaced_custom_object_status.return_value = mock_status_response
1219+
mocker.patch("time.sleep")
1220+
1221+
cluster_config = ClusterConfiguration(name="test-cluster", namespace="test-ns")
1222+
job_config = RayJobSpec(entrypoint="python script.py")
1223+
1224+
result = Cluster.run_job_with_managed_cluster(
1225+
cluster_config=cluster_config,
1226+
job_config=job_config,
1227+
wait_for_completion=True,
1228+
job_timeout_seconds=5,
1229+
job_polling_interval_seconds=1
1230+
)
1231+
1232+
# Should handle missing fields gracefully
1233+
assert "job_cr_name" in result
1234+
assert result.get("job_status") is None or result.get("job_status") == "UNKNOWN"
1235+
1236+
1237+
def test_run_job_with_managed_cluster_custom_job_name(mocker):
1238+
"""Test RayJob with custom job CR name."""
1239+
from codeflare_sdk.ray.job.job import RayJobSpec
1240+
1241+
# Mock dependencies
1242+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
1243+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
1244+
mocker.patch("codeflare_sdk.ray.cluster.cluster.config_check")
1245+
1246+
mock_api_client = mocker.Mock()
1247+
mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.get_api_client", return_value=mock_api_client)
1248+
1249+
mock_co_api = mocker.Mock()
1250+
mocker.patch("kubernetes.client.CustomObjectsApi", return_value=mock_co_api)
1251+
1252+
# Mock Cluster creation
1253+
mock_cluster_resource = {"apiVersion": "ray.io/v1", "kind": "RayCluster", "spec": {}}
1254+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster.__init__", return_value=None)
1255+
mock_cluster_instance = mocker.Mock()
1256+
mock_cluster_instance.resource_yaml = mock_cluster_resource
1257+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster", return_value=mock_cluster_instance)
1258+
1259+
# Mock successful RayJob creation with custom name
1260+
custom_job_name = "my-custom-rayjob-name"
1261+
mock_co_api.create_namespaced_custom_object.return_value = {"metadata": {"name": custom_job_name}}
1262+
1263+
# Mock job completion
1264+
mock_status_response = {
1265+
"status": {
1266+
"jobDeploymentStatus": "Running",
1267+
"jobStatus": "SUCCEEDED",
1268+
"jobId": "custom-job-123"
1269+
}
1270+
}
1271+
mock_co_api.get_namespaced_custom_object_status.return_value = mock_status_response
1272+
mocker.patch("time.sleep")
1273+
1274+
cluster_config = ClusterConfiguration(name="test-cluster", namespace="test-ns")
1275+
job_config = RayJobSpec(entrypoint="python script.py")
1276+
1277+
result = Cluster.run_job_with_managed_cluster(
1278+
cluster_config=cluster_config,
1279+
job_config=job_config,
1280+
job_cr_name=custom_job_name,
1281+
wait_for_completion=True
1282+
)
1283+
1284+
assert result["job_cr_name"] == custom_job_name
1285+
assert result["job_submission_id"] == "custom-job-123"
1286+
assert result["job_status"] == "SUCCEEDED"
1287+
1288+
1289+
def test_run_job_with_managed_cluster_all_status_fields(mocker):
1290+
"""Test RayJob with all possible status fields populated."""
1291+
from codeflare_sdk.ray.job.job import RayJobSpec
1292+
1293+
# Mock dependencies
1294+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
1295+
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
1296+
mocker.patch("codeflare_sdk.ray.cluster.cluster.config_check")
1297+
1298+
mock_api_client = mocker.Mock()
1299+
mocker.patch("codeflare_sdk.common.kubernetes_cluster.auth.get_api_client", return_value=mock_api_client)
1300+
1301+
mock_co_api = mocker.Mock()
1302+
mocker.patch("kubernetes.client.CustomObjectsApi", return_value=mock_co_api)
1303+
1304+
# Mock Cluster creation
1305+
mock_cluster_resource = {"apiVersion": "ray.io/v1", "kind": "RayCluster", "spec": {}}
1306+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster.__init__", return_value=None)
1307+
mock_cluster_instance = mocker.Mock()
1308+
mock_cluster_instance.resource_yaml = mock_cluster_resource
1309+
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster", return_value=mock_cluster_instance)
1310+
1311+
# Mock RayJob creation
1312+
mock_co_api.create_namespaced_custom_object.return_value = {"metadata": {"name": "test-all-fields"}}
1313+
1314+
# Mock job status with all fields
1315+
mock_status_response = {
1316+
"status": {
1317+
"jobDeploymentStatus": "Complete",
1318+
"jobStatus": "SUCCEEDED",
1319+
"dashboardURL": "http://ray-dashboard:8265",
1320+
"rayClusterName": "test-cluster-name",
1321+
"jobId": "all-fields-job-456",
1322+
"message": "Job completed successfully",
1323+
"startTime": "2023-01-01T10:00:00Z",
1324+
"endTime": "2023-01-01T10:05:00Z"
1325+
}
1326+
}
1327+
mock_co_api.get_namespaced_custom_object_status.return_value = mock_status_response
1328+
mocker.patch("time.sleep")
1329+
1330+
cluster_config = ClusterConfiguration(name="test-cluster", namespace="test-ns")
1331+
job_config = RayJobSpec(entrypoint="python comprehensive_script.py")
1332+
1333+
result = Cluster.run_job_with_managed_cluster(
1334+
cluster_config=cluster_config,
1335+
job_config=job_config,
1336+
wait_for_completion=True
1337+
)
1338+
1339+
# Verify all fields are captured
1340+
assert result["job_status"] == "SUCCEEDED"
1341+
assert result["dashboard_url"] == "http://ray-dashboard:8265"
1342+
assert result["ray_cluster_name"] == "test-cluster-name"
1343+
assert result["job_submission_id"] == "all-fields-job-456"
1344+
1345+
9781346
# Make sure to always keep this function last
9791347
def test_cleanup():
9801348
os.remove(f"{aw_dir}test-all-params.yaml")

0 commit comments

Comments
 (0)