Skip to content

Commit d5989af

Browse files
committed
fix kueue management checking
1 parent 1cc2f41 commit d5989af

File tree

5 files changed

+156
-156
lines changed

5 files changed

+156
-156
lines changed

batchtools/bd.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,26 @@
11
import sys
22
import argparse
33
from typing import cast
4-
54
import openshift_client as oc
6-
75
from .basecommand import Command, override
86
from .basecommand import SubParserFactory
9-
from .helpers import oc_delete
7+
from .helpers import oc_delete, is_kueue_managed_job
108

119

1210
class DeleteJobsCommand(Command):
1311
"""
1412
batchtools bd [job-name [job-name ...]]
1513
16-
Delete specified Jobs, or all Jobs if none are specified.
14+
Delete specified Kueue-managed GPU jobs, or all such jobs if none are specified.
15+
16+
Description:
17+
Deletes only those Jobs that are both:
18+
- named like your GPU jobs (name starts with 'job-'), and
19+
- detected as Kueue-managed (via labels/Workload linkage).
1720
"""
1821

1922
name: str = "bd"
20-
help: str = "Delete specified Jobs, or all if none are specified"
23+
help: str = "Delete specified Kueue-managed GPU jobs, or all if none are specified"
2124

2225
@classmethod
2326
@override
@@ -26,34 +29,41 @@ def build_parser(cls, subparsers: SubParserFactory):
2629
p.add_argument(
2730
"job_names",
2831
nargs="*",
29-
help="Optional list of job names to delete",
32+
help="Optional list of job names to delete (must be Kueue-managed)",
3033
)
3134
return p
3235

3336
@staticmethod
3437
@override
3538
def run(args: argparse.Namespace):
3639
args = cast(DeleteJobsCommand, args)
37-
3840
try:
3941
jobs = oc.selector("jobs").objects()
4042
if not jobs:
4143
print("No jobs found.")
4244
return
4345

46+
gpu_jobs = [job for job in jobs]
47+
48+
# only want to delete kueue jobs so filter for kueue jobs
49+
kueue_gpu_jobs = [job for job in gpu_jobs if is_kueue_managed_job(job)]
50+
51+
if not kueue_gpu_jobs:
52+
print("No Kueue-managed GPU jobs to delete.")
53+
return
54+
4455
if args.job_names:
45-
# delete only specified jobs
46-
existing = {job.model.metadata.name for job in jobs}
56+
# if jobs are specified, only delete specified jobs
57+
allowed = {job.model.metadata.name for job in kueue_gpu_jobs}
4758
for name in args.job_names:
48-
if name not in existing:
49-
print(f"{name} does not exist; skipping.")
59+
if name not in allowed:
60+
print(f"{name} is not a Kueue-managed GPU job; skipping.")
5061
continue
5162
oc_delete("job", name)
5263
print(f"Deleted job: {name}")
5364
else:
54-
# delete all jobs
55-
print("No job names provided -> deleting ALL jobs:\n")
56-
for job in jobs:
65+
print("No job names provided -> deleting all Kueue-managed GPU jobs:\n")
66+
for job in kueue_gpu_jobs:
5767
name = job.model.metadata.name
5868
oc_delete("job", name)
5969
print(f"Deleted job: {name}")

batchtools/bj.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import sys
55
import openshift_client as oc
66

7+
from .helpers import is_kueue_managed_job
78
from .basecommand import Command
89

910

@@ -35,8 +36,12 @@ def run(args: argparse.Namespace):
3536
print("No jobs found.")
3637
return
3738

38-
print(f"Found {len(jobs)} job(s):\n")
39-
for job in jobs:
39+
# filter only Kueue-managed jobs
40+
managed = [job for job in jobs if is_kueue_managed_job(job)]
41+
42+
print(f"Found {len(managed)} job(s):\n")
43+
44+
for job in managed:
4045
print(f"- {job.model.metadata.name}")
4146

4247
except oc.OpenShiftPythonException as e:

batchtools/helpers.py

Lines changed: 8 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -29,40 +29,11 @@ def oc_delete(obj_type: str, obj_name: str) -> None:
2929
print(f"Error occurred while deleting {obj_type}/{obj_name}: {e}")
3030

3131

32-
# def is_kueue_managed_job(job_obj) -> bool:
33-
# """
34-
# Returns True if the Job is managed by Kueue.
35-
# Checks:
36-
# 1) Job has label 'kueue.x-k8s.io/queue-name'
37-
# 2) A Workload exists that either:
38-
# - has an ownerReference pointing to this Job, or
39-
# - has label job-name=<job-name>
40-
# """
41-
# try:
42-
# md = job_obj.model.metadata
43-
# labels = getattr(md, "labels", {}) or {}
44-
# if "kueue.x-k8s.io/queue-name" in labels:
45-
# return True
46-
47-
# job_name = md.name
48-
# try:
49-
# workloads = oc.selector("workloads").objects()
50-
# except oc.OpenShiftPythonException:
51-
# workloads = []
52-
53-
# for wl in workloads:
54-
# wl_md = wl.model.metadata
55-
# owners = getattr(wl_md, "ownerReferences", []) or []
56-
# for o in owners:
57-
# if (
58-
# getattr(o, "kind", "") == "Job"
59-
# and getattr(o, "name", "") == job_name
60-
# ):
61-
# return True
62-
# wl_labels = getattr(wl_md, "labels", {}) or {}
63-
# if wl_labels.get("job-name") == job_name:
64-
# return True
65-
# except Exception:
66-
# return False
67-
68-
# return False
32+
def is_kueue_managed_job(job_obj) -> bool:
33+
try:
34+
md = job_obj.model.metadata
35+
labels = getattr(md, "labels", {}) or {}
36+
if "kueue.x-k8s.io/queue-name" in labels:
37+
return True
38+
except Exception:
39+
return False

tests/test_bd.py

Lines changed: 70 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,18 @@
99

1010

1111
@pytest.fixture
12-
def jobs():
12+
def kueue_jobs():
1313
return [
14-
DictToObject({"model": {"metadata": {"name": "job-1"}}}),
15-
DictToObject({"model": {"metadata": {"name": "job-2"}}}),
16-
DictToObject({"model": {"metadata": {"name": "other-1"}}}),
14+
DictToObject({"model": {"metadata": {"name": "job-job-1"}}}),
15+
DictToObject({"model": {"metadata": {"name": "job-job-2"}}}),
16+
]
17+
18+
19+
@pytest.fixture
20+
def mixed_jobs():
21+
return [
22+
DictToObject({"model": {"metadata": {"name": "job-job-1"}}}),
23+
DictToObject({"model": {"metadata": {"name": "ignored-1"}}}),
1724
]
1825

1926

@@ -36,85 +43,107 @@ def patch_selector_with(job_list):
3643
yield mock_selector
3744

3845

46+
def patch_kueue_managed(*names_that_are_kueue):
47+
"""
48+
Patch batchtools.bd.is_kueue_managed_job so ONLY the provided job names return True.
49+
50+
bd.py calls is_kueue_managed_job with a STRING job name, not an APIObject.
51+
This predicate accepts either a str or an object and normalizes to the name.
52+
"""
53+
54+
def _predicate(arg):
55+
if isinstance(arg, str):
56+
name = arg
57+
else:
58+
# tolerate APIObject/DictToObject
59+
name = getattr(
60+
getattr(getattr(arg, "model", None), "metadata", None), "name", None
61+
) or getattr(arg, "name", None)
62+
return name in names_that_are_kueue
63+
64+
return mock.patch("batchtools.bd.is_kueue_managed_job", side_effect=_predicate)
65+
66+
3967
def test_no_jobs_found(args, capsys):
4068
with patch_selector_with([]):
4169
DeleteJobsCommand.run(args)
4270
out = capsys.readouterr().out
4371
assert "No jobs found." in out
4472

4573

46-
def test_delete_all_when_no_names_given(args, jobs, capsys):
74+
def test_no_kueue_managed_gpu_jobs(args, kueue_jobs, capsys):
75+
with patch_selector_with(kueue_jobs), patch_kueue_managed():
76+
DeleteJobsCommand.run(args)
77+
out = capsys.readouterr().out
78+
assert "No Kueue-managed GPU jobs to delete." in out
79+
80+
81+
def test_ignores_non_gpu_named_jobs(args, mixed_jobs, capsys):
82+
with patch_selector_with(mixed_jobs), patch_kueue_managed("job-job-1"):
83+
DeleteJobsCommand.run(args)
84+
out = capsys.readouterr().out
85+
# delete only job-job-1
86+
assert "Deleting job/job-job-1" in out
87+
assert "Deleted job: job-job-1" in out
88+
assert "ignored-1" not in out
89+
90+
91+
def test_delete_all_when_no_names_given(args, kueue_jobs, capsys):
4792
args.job_names = [] # explicit
48-
with patch_selector_with(jobs):
93+
with patch_selector_with(kueue_jobs), patch_kueue_managed("job-job-1", "job-job-2"):
4994
DeleteJobsCommand.run(args)
5095
out = capsys.readouterr().out
5196

52-
assert "No job names provided -> deleting ALL jobs:" in out
53-
# oc_delete should print "Deleting job/<name>" and "Deleted job: <name>"
54-
for obj in jobs:
97+
assert "No job names provided -> deleting all Kueue-managed GPU jobs:" in out
98+
for obj in kueue_jobs:
5599
name = obj.model.metadata.name
56100
assert f"Deleting job/{name}" in out
57101
assert f"Deleted job: {name}" in out
58102

59103

60-
def test_delete_only_specified_existing(args, jobs, capsys):
61-
args.job_names = ["job-1", "job-2"]
62-
with patch_selector_with(jobs):
104+
def test_delete_only_specified_allowed(args, kueue_jobs, capsys):
105+
args.job_names = ["job-job-1", "job-job-2"]
106+
with patch_selector_with(kueue_jobs), patch_kueue_managed("job-job-1"):
63107
DeleteJobsCommand.run(args)
64108
out = capsys.readouterr().out
65109

66-
# Only the two specified jobs should be deleted
67-
assert "Deleting job/job-1" in out
68-
assert "Deleted job: job-1" in out
69-
assert "Deleting job/job-2" in out
70-
assert "Deleted job: job-2" in out
71-
72-
# other-1 exists but is not listed; should not be deleted
73-
assert "Deleting job/other-1" not in out
110+
assert "Deleting job/job-job-1" in out
111+
assert "Deleted job: job-job-1" in out
112+
assert "job-job-2 is not a Kueue-managed GPU job; skipping." in out
113+
assert "Deleting job/job-job-2" not in out
74114

75115

76-
def test_skips_nonexistent_names(args, jobs, capsys):
77-
args.job_names = ["job-1", "does-not-exist"]
78-
with patch_selector_with(jobs):
116+
def test_only_deletes_listed_names_even_if_more_kueue(args, kueue_jobs, capsys):
117+
args.job_names = ["job-job-2"]
118+
with patch_selector_with(kueue_jobs), patch_kueue_managed("job-job-1", "job-job-2"):
79119
DeleteJobsCommand.run(args)
80120
out = capsys.readouterr().out
81121

82-
# Existing job gets deleted
83-
assert "Deleting job/job-1" in out
84-
assert "Deleted job: job-1" in out
85-
86-
# Missing job is skipped with a message
87-
assert "does-not-exist does not exist; skipping." in out
88-
assert "Deleting job/does-not-exist" not in out
122+
assert "Deleting job/job-job-2" in out
123+
assert "Deleted job: job-job-2" in out
124+
# make sure job-job-1 is not deleted implicitly
125+
assert "Deleting job/job-job-1" not in out
89126

90127

91128
def test_delete_jobs_prints_error_when_delete_raises(args, capsys):
92-
# We rely on oc_delete's behavior here: it should catch the exception and
93-
# print "Error occurred while deleting job/<name>: <msg>"
94-
jobs = [DictToObject({"model": {"metadata": {"name": "job-1"}}})]
95-
96-
with patch_selector_with(jobs) as mock_selector:
129+
jobs = [DictToObject({"model": {"metadata": {"name": "job-job-1"}}})]
130+
with patch_selector_with(jobs) as mock_selector, patch_kueue_managed("job-job-1"):
97131
mock_selector.return_value.delete.side_effect = OpenShiftPythonException(
98132
"test exception"
99133
)
100134

101135
DeleteJobsCommand.run(args)
102136
out = capsys.readouterr().out
103-
104-
# From oc_delete
105-
assert "Deleting job/job-1" in out
106-
assert "Error occurred while deleting job/job-1: test exception" in out
137+
assert "Error occurred while deleting job/job-job-1: test exception" in out
107138

108139

109140
def test_sys_exit_when_list_selector_raises(args):
110-
# If listing jobs fails, DeleteJobsCommand should exit with an error
111141
with mock.patch(
112142
"openshift_client.selector",
113143
side_effect=OpenShiftPythonException("not successful"),
114144
):
115145
with pytest.raises(SystemExit) as excinfo:
116146
DeleteJobsCommand.run(args)
117-
118147
assert "Error occurred while deleting jobs: not successful" in str(
119148
excinfo.value
120149
)

0 commit comments

Comments
 (0)