Skip to content

Commit ed150e2

Browse files
aivanoufacebook-github-bot
authored andcommitted
Remove builder methods from Role (#80)
Summary: Pull Request resolved: #80 Remove all builder methods from the ``Role`` class Reviewed By: kiukchung Differential Revision: D29168794 fbshipit-source-id: 286a72bc6bfb6df9046e015e5fa55a8d2313c7a8
1 parent 812f04a commit ed150e2

File tree

12 files changed

+293
-195
lines changed

12 files changed

+293
-195
lines changed

examples/apps/datapreproc/component.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def data_preproc(
5454
],
5555
env=env,
5656
resource=resource,
57-
).replicas(1)
57+
num_replicas=1,
58+
)
5859

59-
return specs.AppDef(name).of(ddp_role)
60+
return specs.AppDef(name, roles=[ddp_role])

torchx/cli/test/cmd_log_test.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from unittest.mock import MagicMock, patch
1212

1313
from torchx.cli.cmd_log import ENDC, GREEN, get_logs
14-
from torchx.specs.api import AppDef, Role, parse_app_handle
14+
from torchx.specs import AppDef, Role, parse_app_handle
1515

1616

1717
class SentinelError(Exception):
@@ -31,9 +31,12 @@ def __call__(self, name: Optional[str] = None) -> "MockRunner":
3131

3232
def describe(self, app_handle: str) -> AppDef:
3333
scheduler_backend, session_name, app_id = parse_app_handle(app_handle)
34-
return AppDef(name=app_id).of(
35-
Role(name="master", image="test_image").replicas(1),
36-
Role(name="trainer", image="test_image").replicas(3),
34+
return AppDef(
35+
name=app_id,
36+
roles=[
37+
Role(name="master", image="test_image", num_replicas=1),
38+
Role(name="trainer", image="test_image", num_replicas=3),
39+
],
3740
)
3841

3942
def log_lines(

torchx/components/base/roles.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -106,15 +106,16 @@ def create_torch_dist_role(
106106
entrypoint = os.path.join(macros.img_root, entrypoint)
107107

108108
args = [*torch_run_args, entrypoint, *args]
109-
return (
110-
Role(
111-
name,
112-
image=image,
113-
base_image=base_image,
114-
resource=resource,
115-
port_map=port_map,
116-
)
117-
.runs(entrypoint_override, *args, **env)
118-
.replicas(num_replicas)
119-
.with_retry_policy(retry_policy, max_retries)
109+
return Role(
110+
name,
111+
image=image,
112+
base_image=base_image,
113+
entrypoint=entrypoint_override,
114+
args=args,
115+
env=env,
116+
num_replicas=num_replicas,
117+
retry_policy=retry_policy,
118+
max_retries=max_retries,
119+
resource=resource,
120+
port_map=port_map,
120121
)

torchx/components/base/test/roles_test.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ def test_build_create_torch_dist_role(self) -> None:
3333
nnodes="2:4",
3434
max_restarts=3,
3535
no_python=True,
36-
).replicas(2)
36+
num_replicas=2,
37+
)
3738
self.assertEqual("elastic_trainer", elastic_trainer.name)
3839
self.assertEqual("python", elastic_trainer.entrypoint)
3940
self.assertEqual(
@@ -153,7 +154,8 @@ def test_json_serialization_factory(self) -> None:
153154
nnodes="2:4",
154155
rdzv_backend="etcd",
155156
rdzv_id="foobar",
156-
).replicas(3)
157+
num_replicas=3,
158+
)
157159

158160
# this is effectively JSON
159161
elastic_json = asdict(role)

torchx/components/dist.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,12 @@ def ddp(
5555
base_image=base_image,
5656
entrypoint=entrypoint,
5757
resource=resource or specs.NULL_RESOURCE,
58+
num_replicas=nnodes,
5859
script_args=list(script_args),
5960
script_envs=env,
6061
nproc_per_node=nproc_per_node,
6162
nnodes=nnodes,
6263
max_restarts=0,
63-
).replicas(nnodes)
64+
)
6465

6566
return specs.AppDef(name).of(ddp_role)

torchx/pipelines/kfp/test/adapter_test.py

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -28,27 +28,22 @@ class KFPSpecsTest(unittest.TestCase):
2828
"""
2929

3030
def _test_app(self) -> api.AppDef:
31-
trainer_role = (
32-
api.Role(
33-
name="trainer",
34-
image="pytorch/torchx:latest",
35-
resource=api.Resource(
36-
cpu=2,
37-
memMB=3000,
38-
gpu=4,
39-
),
40-
port_map={"foo": 1234},
41-
)
42-
.runs(
43-
"main",
44-
"--output-path",
45-
"blah",
46-
FOO="bar",
47-
)
48-
.replicas(1)
31+
trainer_role = api.Role(
32+
name="trainer",
33+
image="pytorch/torchx:latest",
34+
entrypoint="main",
35+
args=["--output-path", "blah"],
36+
env={"FOO": "bar"},
37+
resource=api.Resource(
38+
cpu=2,
39+
memMB=3000,
40+
gpu=4,
41+
),
42+
port_map={"foo": 1234},
43+
num_replicas=1,
4944
)
5045

51-
return api.AppDef("test").of(trainer_role)
46+
return api.AppDef("test", roles=[trainer_role])
5247

5348
def _compile_pipeline(self, pipeline: Callable[[], None]) -> None:
5449
with tempfile.TemporaryDirectory() as tmpdir:

torchx/runner/api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ def dryrun(
248248
if role.num_replicas <= 0:
249249
raise ValueError(
250250
f"Non-positive replicas for role: {role.name}."
251-
f" Did you forget to call role.replicas(positive_number)?"
251+
f" Did you forget to set role.num_replicas?"
252252
)
253253
sched = self._scheduler(scheduler)
254254
sched._validate(app, scheduler)

torchx/runner/test/api_test.py

Lines changed: 78 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -68,23 +68,24 @@ def test_validate_no_roles(self, _) -> None:
6868
def test_validate_no_resource(self, _) -> None:
6969
runner = Runner("test", schedulers={"default": self.scheduler})
7070
with self.assertRaises(ValueError):
71-
role = Role("no resource", image="no_image").runs("echo", "hello_world")
72-
app = AppDef("no resource").of(role)
71+
role = Role(
72+
"no resource", image="no_image", entrypoint="echo", args=["hello_world"]
73+
)
74+
app = AppDef("no resource", roles=[role])
7375
runner.run(app)
7476

7577
def test_validate_invalid_replicas(self, _) -> None:
7678
runner = Runner("test", schedulers={"default": self.scheduler})
7779
with self.assertRaises(ValueError):
78-
role = (
79-
Role(
80-
"invalid replicas",
81-
image="torch",
82-
resource=Resource(cpu=1, gpu=0, memMB=500),
83-
)
84-
.runs("echo", "hello_world")
85-
.replicas(0)
80+
role = Role(
81+
"invalid replicas",
82+
image="torch",
83+
entrypoint="echo",
84+
args=["hello_world"],
85+
num_replicas=0,
86+
resource=Resource(cpu=1, gpu=0, memMB=500),
8687
)
87-
app = AppDef("invalid replicas").of(role)
88+
app = AppDef("invalid replicas", roles=[role])
8889
runner.run(app)
8990

9091
def test_run(self, _) -> None:
@@ -95,10 +96,14 @@ def test_run(self, _) -> None:
9596
wait_interval=1,
9697
)
9798
self.assertEqual(1, len(session.scheduler_backends()))
98-
role = Role(name="touch", image=self.test_dir, resource=resource.SMALL).runs(
99-
"touch.sh", test_file
99+
role = Role(
100+
name="touch",
101+
image=self.test_dir,
102+
resource=resource.SMALL,
103+
entrypoint="touch.sh",
104+
args=[test_file],
100105
)
101-
app = AppDef("name").of(role)
106+
app = AppDef("name", roles=[role])
102107

103108
app_handle = session.run(app, cfg=self.cfg)
104109
app_status = none_throws(session.wait(app_handle))
@@ -109,20 +114,28 @@ def test_dryrun(self, _) -> None:
109114
session = Runner(
110115
name=SESSION_NAME, schedulers={"default": scheduler_mock}, wait_interval=1
111116
)
112-
role = Role(name="touch", image=self.test_dir, resource=resource.SMALL).runs(
113-
"echo", "hello world"
117+
role = Role(
118+
name="touch",
119+
image=self.test_dir,
120+
resource=resource.SMALL,
121+
entrypoint="echo",
122+
args=["hello world"],
114123
)
115-
app = AppDef("name").of(role)
124+
app = AppDef("name", roles=[role])
116125
session.dryrun(app, "default", cfg=self.cfg)
117126
scheduler_mock.submit_dryrun.assert_called_once_with(app, self.cfg)
118127
scheduler_mock._validate.assert_called_once()
119128

120129
def test_describe(self, _) -> None:
121130
session = Runner(name=SESSION_NAME, schedulers={"default": self.scheduler})
122-
role = Role(name="sleep", image=self.test_dir, resource=resource.SMALL).runs(
123-
"sleep.sh", "60"
131+
role = Role(
132+
name="sleep",
133+
image=self.test_dir,
134+
resource=resource.SMALL,
135+
entrypoint="sleep.sh",
136+
args=["60"],
124137
)
125-
app = AppDef("sleeper").of(role)
138+
app = AppDef("sleeper", roles=[role])
126139

127140
app_handle = session.run(app, cfg=self.cfg)
128141
self.assertEqual(app, session.describe(app_handle))
@@ -133,10 +146,14 @@ def test_list(self, _) -> None:
133146
session = Runner(
134147
name=SESSION_NAME, schedulers={"default": self.scheduler}, wait_interval=1
135148
)
136-
role = Role(name="touch", image=self.test_dir, resource=resource.SMALL).runs(
137-
"sleep.sh", "1"
149+
role = Role(
150+
name="touch",
151+
image=self.test_dir,
152+
resource=resource.SMALL,
153+
entrypoint="sleep.sh",
154+
args=["1"],
138155
)
139-
app = AppDef("sleeper").of(role)
156+
app = AppDef("sleeper", roles=[role])
140157

141158
num_apps = 4
142159

@@ -159,10 +176,14 @@ def test_evict_non_existent_app(self, _) -> None:
159176
name=SESSION_NAME, schedulers={"default": scheduler}, wait_interval=1
160177
)
161178
test_file = os.path.join(self.test_dir, "test_file")
162-
role = Role(name="touch", image=self.test_dir, resource=resource.SMALL).runs(
163-
"touch.sh", test_file
179+
role = Role(
180+
name="touch",
181+
image=self.test_dir,
182+
resource=resource.SMALL,
183+
entrypoint="touch.sh",
184+
args=[test_file],
164185
)
165-
app = AppDef("touch_test_file").of(role)
186+
app = AppDef("touch_test_file", roles=[role])
166187

167188
# local scheduler was setup with a cache size of 1
168189
# run the same app twice (the first will be removed from the scheduler's cache)
@@ -183,10 +204,14 @@ def test_status(self, _) -> None:
183204
session = Runner(
184205
name=SESSION_NAME, schedulers={"default": self.scheduler}, wait_interval=1
185206
)
186-
role = Role(name="sleep", image=self.test_dir, resource=resource.SMALL).runs(
187-
"sleep.sh", "60"
207+
role = Role(
208+
name="sleep",
209+
image=self.test_dir,
210+
resource=resource.SMALL,
211+
entrypoint="sleep.sh",
212+
args=["60"],
188213
)
189-
app = AppDef("sleeper").of(role)
214+
app = AppDef("sleeper", roles=[role])
190215
app_handle = session.run(app, cfg=self.cfg)
191216
app_status = none_throws(session.status(app_handle))
192217
self.assertEqual(AppState.RUNNING, app_status.state)
@@ -213,10 +238,13 @@ def test_status_ui_url(self, json_dumps_mock: MagicMock, _) -> None:
213238
session = Runner(
214239
name="test_ui_url_session", schedulers={"default": mock_scheduler}
215240
)
216-
role = Role("ignored", image=self.test_dir, resource=resource.SMALL).runs(
217-
"/bin/echo"
241+
role = Role(
242+
"ignored",
243+
image=self.test_dir,
244+
resource=resource.SMALL,
245+
entrypoint="/bin/echo",
218246
)
219-
app_handle = session.run(AppDef(app_id).of(role))
247+
app_handle = session.run(AppDef(app_id, roles=[role]))
220248
status = none_throws(session.status(app_handle))
221249
self.assertEquals(resp.ui_url, status.ui_url)
222250

@@ -233,10 +261,13 @@ def test_status_structured_msg(self, json_dumps_mock: MagicMock, _) -> None:
233261
session = Runner(
234262
name="test_structured_msg", schedulers={"default": mock_scheduler}
235263
)
236-
role = Role("ignored", image=self.test_dir, resource=resource.SMALL).runs(
237-
"/bin/echo"
264+
role = Role(
265+
"ignored",
266+
image=self.test_dir,
267+
resource=resource.SMALL,
268+
entrypoint="/bin/echo",
238269
)
239-
app_handle = session.run(AppDef(app_id).of(role))
270+
app_handle = session.run(AppDef(app_id, roles=[role]))
240271
status = none_throws(session.status(app_handle))
241272
self.assertEquals(resp.structured_error_msg, status.structured_error_msg)
242273

@@ -305,10 +336,14 @@ def test_get_schedulers(self, json_dumps_mock: MagicMock, _) -> None:
305336
schedulers = {"default": default_sched_mock, "local": local_sched_mock}
306337
session = Runner(name="test_session", schedulers=schedulers)
307338

308-
role = Role(name="sleep", image=self.test_dir, resource=resource.SMALL).runs(
309-
"sleep.sh", "60"
339+
role = Role(
340+
name="sleep",
341+
image=self.test_dir,
342+
resource=resource.SMALL,
343+
entrypoint="sleep.sh",
344+
args=["60"],
310345
)
311-
app = AppDef("sleeper").of(role)
346+
app = AppDef("sleeper", roles=[role])
312347
cfg = RunConfig()
313348
session.run(app, scheduler="local", cfg=cfg)
314349
local_sched_mock.submit.called_once_with(app, cfg)
@@ -353,8 +388,11 @@ def test_run_from_file(self, _) -> None:
353388
expected_app = AppDef(
354389
"ddp_app",
355390
roles=[
356-
Role("worker", image="dummy_image", resource=Resource(1, 0, 1)).runs(
357-
entrypoint
391+
Role(
392+
"worker",
393+
image="dummy_image",
394+
resource=Resource(1, 0, 1),
395+
entrypoint=entrypoint,
358396
)
359397
],
360398
)

torchx/runner/test/resource/distributed.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,16 @@ def ddp(
3737
if env:
3838
app_env.update(env)
3939
entrypoint = os.path.join(specs.macros.img_root, script)
40-
ddp_role = (
41-
specs.Role(
42-
name=role,
43-
image="dummy_image",
44-
resource=specs.Resource(cpu=1, gpu=0, memMB=1),
45-
)
46-
.runs(entrypoint, *script_args, **app_env)
47-
.replicas(nnodes)
40+
ddp_role = specs.Role(
41+
name=role,
42+
image="dummy_image",
43+
entrypoint=entrypoint,
44+
args=list(script_args),
45+
env=app_env,
46+
num_replicas=nnodes,
47+
resource=specs.Resource(cpu=1, gpu=0, memMB=1),
4848
)
4949

5050
# get app name from cli or extract from fbpkg. Note that fbpkg name can has "."
5151
# but not allowed in app name.
52-
return specs.AppDef(name).of(ddp_role)
52+
return specs.AppDef(name, roles=[ddp_role])

0 commit comments

Comments
 (0)