Skip to content

Commit 5777e7c

Browse files
authored
Merge branch 'main' into export-D84192560
2 parents f7df09b + 79e14da commit 5777e7c

File tree

12 files changed

+76
-19
lines changed

12 files changed

+76
-19
lines changed

.github/workflows/doc-build.yaml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,12 +85,6 @@ jobs:
8585
set -eux
8686
sudo apt-get install -y pandoc
8787
pip install -r docs/requirements.txt
88-
- name: Start Airflow
89-
run: |
90-
# start airflow in background
91-
airflow standalone &
92-
# wait 5 seconds for airflow to start
93-
sleep 5
9488
- name: Build
9589
run: |
9690
set -ex

docs/requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,3 @@ jupytext
1010
ipython_genutils
1111
# https://github.com/jupyter/nbconvert/issues/1736
1212
jinja2>=3.1.4
13-
apache-airflow==2.10.5

docs/source/advanced.rst

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,33 @@ resource can then be used in the following manner:
153153

154154
test_app("gpu_x2")
155155

156+
Alternatively, you can define custom named resources in a Python module and point
157+
to it using the ``TORCHX_CUSTOM_NAMED_RESOURCES`` environment variable:
158+
159+
.. code-block:: python
160+
161+
# my_resources.py
162+
from torchx.specs import Resource
163+
164+
def gpu_x8_efa() -> Resource:
165+
return Resource(cpu=100, gpu=8, memMB=819200, devices={"vpc.amazonaws.com/efa": 1})
166+
167+
def cpu_x32() -> Resource:
168+
return Resource(cpu=32, gpu=0, memMB=131072)
169+
170+
NAMED_RESOURCES = {
171+
"gpu_x8_efa": gpu_x8_efa,
172+
"cpu_x32": cpu_x32,
173+
}
174+
175+
Then set the environment variable:
176+
177+
.. code-block:: bash
178+
179+
export TORCHX_CUSTOM_NAMED_RESOURCES=my_resources
180+
181+
This allows you to use your custom resources without creating a package with entry points.
182+
156183

157184
Registering Custom Components
158185
-------------------------------

torchx/cli/cmd_run.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from torchx.runner import config, get_runner, Runner
2727
from torchx.runner.config import load_sections
2828
from torchx.schedulers import get_default_scheduler_name, get_scheduler_factories
29-
from torchx.specs import CfgVal
29+
from torchx.specs import CfgVal, Workspace
3030
from torchx.specs.finder import (
3131
_Component,
3232
ComponentNotFoundException,
@@ -36,7 +36,6 @@
3636
)
3737
from torchx.util.log_tee_helpers import tee_logs
3838
from torchx.util.types import none_throws
39-
from torchx.workspace import Workspace
4039

4140

4241
MISSING_COMPONENT_ERROR_MSG = (

torchx/runner/api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#!/usr/bin/env python3
21
# Copyright (c) Meta Platforms, Inc. and affiliates.
32
# All rights reserved.
43
#
@@ -43,6 +42,7 @@
4342
parse_app_handle,
4443
runopts,
4544
UnknownAppException,
45+
Workspace,
4646
)
4747
from torchx.specs.finder import get_component
4848
from torchx.tracker.api import (
@@ -54,7 +54,7 @@
5454
from torchx.util.session import get_session_id_or_create_new, TORCHX_INTERNAL_SESSION_ID
5555

5656
from torchx.util.types import none_throws
57-
from torchx.workspace.api import Workspace, WorkspaceMixin
57+
from torchx.workspace import WorkspaceMixin
5858

5959
if TYPE_CHECKING:
6060
from typing_extensions import Self

torchx/runner/test/config_test.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,8 @@
2525
)
2626
from torchx.schedulers import get_scheduler_factories, Scheduler
2727
from torchx.schedulers.api import DescribeAppResponse, ListAppResponse, Stream
28-
from torchx.specs import AppDef, AppDryRunInfo, CfgVal, runopts
28+
from torchx.specs import AppDef, AppDryRunInfo, CfgVal, runopts, Workspace
2929
from torchx.test.fixtures import TestWithTmpDir
30-
from torchx.workspace import Workspace
3130

3231

3332
class TestScheduler(Scheduler):

torchx/schedulers/api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#!/usr/bin/env python3
21
# Copyright (c) Meta Platforms, Inc. and affiliates.
32
# All rights reserved.
43
#
@@ -22,8 +21,9 @@
2221
Role,
2322
RoleStatus,
2423
runopts,
24+
Workspace,
2525
)
26-
from torchx.workspace.api import Workspace, WorkspaceMixin
26+
from torchx.workspace import WorkspaceMixin
2727

2828

2929
DAYS_IN_2_WEEKS = 14

torchx/specs/__init__.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
scheduler or pipeline adapter.
1313
"""
1414
import difflib
15+
16+
import os
1517
from typing import Callable, Dict, Mapping, Optional
1618

1719
from torchx.specs.api import (
@@ -64,8 +66,10 @@
6466
GENERIC_NAMED_RESOURCES: Mapping[str, ResourceFactory] = import_attr(
6567
"torchx.specs.named_resources_generic", "NAMED_RESOURCES", default={}
6668
)
67-
FB_NAMED_RESOURCES: Mapping[str, ResourceFactory] = import_attr(
68-
"torchx.specs.fb.named_resources", "NAMED_RESOURCES", default={}
69+
CUSTOM_NAMED_RESOURCES: Mapping[str, ResourceFactory] = import_attr(
70+
os.environ.get("TORCHX_CUSTOM_NAMED_RESOURCES", "torchx.specs.fb.named_resources"),
71+
"NAMED_RESOURCES",
72+
default={},
6973
)
7074

7175

@@ -76,7 +80,7 @@ def _load_named_resources() -> Dict[str, Callable[[], Resource]]:
7680
for name, resource in {
7781
**GENERIC_NAMED_RESOURCES,
7882
**AWS_NAMED_RESOURCES,
79-
**FB_NAMED_RESOURCES,
83+
**CUSTOM_NAMED_RESOURCES,
8084
**resource_methods,
8185
}.items():
8286
materialized_resources[name] = resource

torchx/specs/named_resources_aws.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,16 @@ def aws_p5_48xlarge() -> Resource:
120120
)
121121

122122

123+
def aws_p5e_48xlarge() -> Resource:
124+
return Resource(
125+
cpu=192,
126+
gpu=8,
127+
memMB=2048 * GiB,
128+
capabilities={K8S_ITYPE: "p5e.48xlarge"},
129+
devices={EFA_DEVICE: 32},
130+
)
131+
132+
123133
def aws_p5en_48xlarge() -> Resource:
124134
return Resource(
125135
cpu=192,
@@ -419,6 +429,7 @@ def aws_inf2_48xlarge() -> Resource:
419429
"aws_p4d.24xlarge": aws_p4d_24xlarge,
420430
"aws_p4de.24xlarge": aws_p4de_24xlarge,
421431
"aws_p5.48xlarge": aws_p5_48xlarge,
432+
"aws_p5e.48xlarge": aws_p5e_48xlarge,
422433
"aws_p5en.48xlarge": aws_p5en_48xlarge,
423434
"aws_g4dn.xlarge": aws_g4dn_xlarge,
424435
"aws_g4dn.2xlarge": aws_g4dn_2xlarge,

torchx/specs/test/named_resources_aws_test.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
aws_p4d_24xlarge,
4545
aws_p4de_24xlarge,
4646
aws_p5_48xlarge,
47+
aws_p5e_48xlarge,
4748
aws_p5en_48xlarge,
4849
aws_t3_medium,
4950
aws_trn1_2xlarge,
@@ -95,13 +96,19 @@ def test_aws_p4(self) -> None:
9596

9697
def test_aws_p5(self) -> None:
9798
p5 = aws_p5_48xlarge()
99+
p5e = aws_p5e_48xlarge()
98100
p5en = aws_p5en_48xlarge()
99101

100102
self.assertEqual(192, p5.cpu)
101103
self.assertEqual(8, p5.gpu)
102104
self.assertEqual(2048 * GiB, p5.memMB)
103105
self.assertEqual({EFA_DEVICE: 32}, p5.devices)
104106

107+
self.assertEqual(192, p5e.cpu)
108+
self.assertEqual(8, p5e.gpu)
109+
self.assertEqual(2048 * GiB, p5e.memMB)
110+
self.assertEqual({EFA_DEVICE: 32}, p5e.devices)
111+
105112
self.assertEqual(192, p5en.cpu)
106113
self.assertEqual(8, p5en.gpu)
107114
self.assertEqual(2048 * GiB, p5en.memMB)

0 commit comments

Comments
 (0)