Skip to content

Commit 12856f7

Browse files
authored
Move to MCAD v1.34.1 (#770)
* Update API version * docs: add version requirement to docs * test: update tests to mcad v1.34.1 --------- Co-authored-by: Sara Kokkila Schumacher <[email protected]>
1 parent 1f5eac8 commit 12856f7

File tree

2 files changed

+24
-22
lines changed

2 files changed

+24
-22
lines changed

torchx/schedulers/kubernetes_mcad_scheduler.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
See deploying Multi-Cluster-Application-Dispatcher guide
2020
https://github.com/project-codeflare/multi-cluster-app-dispatcher/blob/main/doc/deploy/deployment.md
2121
22+
This implementation requires MCAD v1.34.1 or higher.
23+
2224
TorchX uses `torch.distributed.run <https://pytorch.org/docs/stable/elastic/run.html>`_ to run distributed training.
2325
2426
Learn more about running distributed trainers :py:mod:`torchx.components.dist`
@@ -369,7 +371,7 @@ def create_pod_group(
369371
pod_group_name = app_id + "-pg" + str(role_idx)
370372

371373
labels = object_labels(app, app_id)
372-
labels.update({"appwrapper.mcad.ibm.com": app_id})
374+
labels.update({"appwrapper.workload.codeflare.dev": app_id})
373375

374376
pod_group: Dict[str, Any] = {
375377
"apiVersion": "scheduling.sigs.k8s.io/v1alpha1",
@@ -434,7 +436,7 @@ def mcad_svc(
434436
target_port=int(service_port),
435437
)
436438
],
437-
selector={"appwrapper.mcad.ibm.com": svc_name},
439+
selector={"appwrapper.workload.codeflare.dev": svc_name},
438440
session_affinity="None",
439441
type="ClusterIP",
440442
),
@@ -596,7 +598,7 @@ def app_to_resource(
596598

597599
"""
598600
Create Service:
599-
The selector will have the key 'appwrapper.mcad.ibm.com', and the value will be
601+
The selector will have the key 'appwrapper.workload.codeflare.dev', and the value will be
600602
the appwrapper name
601603
"""
602604

@@ -627,7 +629,7 @@ def app_to_resource(
627629
enable_retry(job_spec, appwrapper_retries, total_pods)
628630

629631
resource: Dict[str, object] = {
630-
"apiVersion": "mcad.ibm.com/v1beta1",
632+
"apiVersion": "workload.codeflare.dev/v1beta1",
631633
"kind": "AppWrapper",
632634
"metadata": {"name": unique_app_id, "namespace": namespace},
633635
"spec": job_spec,
@@ -803,7 +805,7 @@ class KubernetesMCADScheduler(DockerWorkspaceMixin, Scheduler[KubernetesMCADOpts
803805
co-scheduler.
804806
For installation instructions see: https://github.com/project-codeflare/multi-cluster-app-dispatcher/blob/main/doc/deploy/deployment.md
805807
806-
This has been confirmed to work with MCAD main branch and OpenShift Kubernetes
808+
This has been confirmed to work with MCAD main branch v1.34.1 or higher and OpenShift Kubernetes
807809
Client Version: 4.10.13
808810
Server Version: 4.9.18
809811
Kubernetes Version: v1.22.3+e790d7f
@@ -947,7 +949,7 @@ def schedule(self, dryrun_info: AppDryRunInfo[KubernetesMCADJob]) -> str:
947949

948950
try:
949951
resp = self._custom_objects_api().create_namespaced_custom_object(
950-
group="mcad.ibm.com",
952+
group="workload.codeflare.dev",
951953
version="v1beta1",
952954
namespace=namespace,
953955
plural="appwrappers",
@@ -1035,7 +1037,7 @@ def _validate(self, app: AppDef, scheduler: str) -> None:
10351037
def _cancel_existing(self, app_id: str) -> None:
10361038
namespace, name = app_id.split(":")
10371039
self._custom_objects_api().delete_namespaced_custom_object(
1038-
group="mcad.ibm.com",
1040+
group="workload.codeflare.dev",
10391041
version="v1beta1",
10401042
namespace=namespace,
10411043
plural="appwrappers",
@@ -1096,7 +1098,7 @@ def describe(self, app_id: str) -> Optional[DescribeAppResponse]:
10961098

10971099
# Production section
10981100
api_instance = self._custom_objects_api
1099-
group = "mcad.ibm.com"
1101+
group = "workload.codeflare.dev"
11001102
version = "v1beta1"
11011103
plural = "appwrappers"
11021104
try:
@@ -1214,7 +1216,7 @@ def list(self) -> List[ListAppResponse]:
12141216
namespace = active_context["context"]["namespace"]
12151217

12161218
resp = self._custom_objects_api().list_namespaced_custom_object(
1217-
group="mcad.ibm.com",
1219+
group="workload.codeflare.dev",
12181220
version="v1beta1",
12191221
namespace=namespace,
12201222
plural="appwrappers",

torchx/schedulers/test/kubernetes_mcad_scheduler_test.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ def test_create_pod_group(self) -> None:
381381
"app.kubernetes.io/name": "test",
382382
"app.kubernetes.io/managed-by": "torchx.pytorch.org",
383383
"app.kubernetes.io/instance": "app-name",
384-
"appwrapper.mcad.ibm.com": unique_app_name,
384+
"appwrapper.workload.codeflare.dev": unique_app_name,
385385
},
386386
},
387387
"spec": {
@@ -446,7 +446,7 @@ def test_create_mcad_service(self) -> None:
446446
target_port=int(service_port),
447447
)
448448
],
449-
selector={"appwrapper.mcad.ibm.com": service_name},
449+
selector={"appwrapper.workload.codeflare.dev": service_name},
450450
session_affinity="None",
451451
type="ClusterIP",
452452
),
@@ -550,7 +550,7 @@ def test_submit_dryrun(self) -> None:
550550

551551
self.assertEqual(
552552
resource,
553-
f"""apiVersion: mcad.ibm.com/v1beta1
553+
f"""apiVersion: workload.codeflare.dev/v1beta1
554554
kind: AppWrapper
555555
metadata:
556556
name: app-name
@@ -567,7 +567,7 @@ def test_submit_dryrun(self) -> None:
567567
app.kubernetes.io/instance: app-name
568568
app.kubernetes.io/managed-by: torchx.pytorch.org
569569
app.kubernetes.io/name: test
570-
appwrapper.mcad.ibm.com: app-name
570+
appwrapper.workload.codeflare.dev: app-name
571571
name: app-name-pg0
572572
namespace: test_namespace
573573
spec:
@@ -663,7 +663,7 @@ def test_submit_dryrun(self) -> None:
663663
targetPort: 1234
664664
publishNotReadyAddresses: true
665665
selector:
666-
appwrapper.mcad.ibm.com: app-name
666+
appwrapper.workload.codeflare.dev: app-name
667667
sessionAffinity: None
668668
type: ClusterIP
669669
status:
@@ -1607,7 +1607,7 @@ def test_submit(self, create_namespaced_custom_object: MagicMock) -> None:
16071607
self.assertEqual(id, "testnamespace:testid")
16081608
call = create_namespaced_custom_object.call_args
16091609
args, kwargs = call
1610-
self.assertEqual(kwargs["group"], "mcad.ibm.com")
1610+
self.assertEqual(kwargs["group"], "workload.codeflare.dev")
16111611
self.assertEqual(kwargs["version"], "v1beta1")
16121612
self.assertEqual(kwargs["namespace"], "testnamespace")
16131613
self.assertEqual(kwargs["plural"], "appwrappers")
@@ -1665,7 +1665,7 @@ def test_describe(self, get_namespaced_custom_object: MagicMock) -> None:
16651665
call = get_namespaced_custom_object.call_args
16661666
args, kwargs = call
16671667

1668-
assert "mcad.ibm.com" in args
1668+
assert "workload.codeflare.dev" in args
16691669
assert "v1beta1" in args
16701670
assert "appwrappers" in args
16711671
assert "foo" in args
@@ -1767,7 +1767,7 @@ def test_describe_unknown(self, get_namespaced_custom_object: MagicMock) -> None
17671767
call = get_namespaced_custom_object.call_args
17681768
args, kwargs = call
17691769

1770-
assert "mcad.ibm.com" in args
1770+
assert "workload.codeflare.dev" in args
17711771
assert "v1beta1" in args
17721772
assert "appwrappers" in args
17731773
assert "foo" in args
@@ -1844,7 +1844,7 @@ def test_cancel_existing(self, delete_namespaced_custom_object: MagicMock) -> No
18441844
self.assertEqual(
18451845
kwargs,
18461846
{
1847-
"group": "mcad.ibm.com",
1847+
"group": "workload.codeflare.dev",
18481848
"version": "v1beta1",
18491849
"namespace": "testnamespace",
18501850
"plural": "appwrappers",
@@ -1866,7 +1866,7 @@ def test_list(self, list_namespaced_custom_object: MagicMock) -> None:
18661866
self.assertEqual(
18671867
kwargs,
18681868
{
1869-
"group": "mcad.ibm.com",
1869+
"group": "workload.codeflare.dev",
18701870
"version": "v1beta1",
18711871
"namespace": "default",
18721872
"plural": "appwrappers",
@@ -1877,12 +1877,12 @@ def test_list(self, list_namespaced_custom_object: MagicMock) -> None:
18771877
@patch("kubernetes.client.CustomObjectsApi.list_namespaced_custom_object")
18781878
def test_list_values(self, list_namespaced_custom_object: MagicMock) -> None:
18791879
list_namespaced_custom_object.return_value = {
1880-
"apiVersion": "mcad.ibm.com/v1beta1",
1880+
"apiVersion": "workload.codeflare.dev/v1beta1",
18811881
"name": "test-training",
18821882
"namespace": "default",
18831883
"items": [
18841884
{
1885-
"apiVersion": "mcad.ibm.com/v1beta1",
1885+
"apiVersion": "workload.codeflare.dev/v1beta1",
18861886
"kind": "AppWrapper",
18871887
"metadata": {
18881888
"name": "test-training",
@@ -1935,7 +1935,7 @@ def test_list_values(self, list_namespaced_custom_object: MagicMock) -> None:
19351935
},
19361936
},
19371937
{
1938-
"apiVersion": "mcad.ibm.com/v1beta1",
1938+
"apiVersion": "workload.codeflare.dev/v1beta1",
19391939
"kind": "AppWrapper",
19401940
"metadata": {
19411941
"name": "test-training",

0 commit comments

Comments
 (0)