Skip to content

Commit 873f509

Browse files
Kiuk Chungfacebook-github-bot
authored andcommitted
(torchx/runner) standardize class and param naming around runopts and runcfg (#252)
Summary: Pull Request resolved: #252 closes: #250 I've done a few things on this diff: 1. Renamed `torchx.specs.api.Runopt` to `torchx.specs.runopt` (for consistency with `runopts`) 2. Renamed variables (where I could) `runcfg` to `cfg` (to be consistent with the scheduler and runner apis) 3. Renamed the config section to `[$profile.$sched.cfg]` instead of `[$profile.scheduler_args.$sched]` 4. Changed the torchx run cli's `-a` (short for `--scheduler_args`) to `-cfg` for consistency with the rest of the system. Reviewed By: aivanou Differential Revision: D31656766 fbshipit-source-id: 8c009852d5807010ac4cd33902b294cff4bd0ec1
1 parent fcf19f8 commit 873f509

File tree

6 files changed

+119
-108
lines changed

6 files changed

+119
-108
lines changed

torchx/cli/cmd_run.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,18 @@
1010
import sys
1111
from dataclasses import asdict
1212
from pprint import pformat
13-
from typing import Dict, List, cast, Type, Optional
13+
from typing import Dict, List, Optional, Type, cast
1414

1515
import torchx.specs as specs
1616
from pyre_extensions import none_throws
1717
from torchx.cli.cmd_base import SubCommand
1818
from torchx.runner import Runner, get_runner
19-
from torchx.schedulers import get_scheduler_factories, get_default_scheduler_name
19+
from torchx.schedulers import get_default_scheduler_name, get_scheduler_factories
2020
from torchx.specs.finder import (
21+
ComponentNotFoundException,
22+
ComponentValidationException,
2123
_Component,
2224
get_components,
23-
ComponentValidationException,
24-
ComponentNotFoundException,
2525
)
2626
from torchx.util.types import to_dict
2727

@@ -41,13 +41,13 @@ def _convert_to_option_type(
4141
return option_type(value)
4242

4343

44-
def _parse_run_config(arg: str, scheduler_run_opts: specs.runopts) -> specs.RunConfig:
44+
def _parse_run_config(arg: str, scheduler_opts: specs.runopts) -> specs.RunConfig:
4545
conf = specs.RunConfig()
4646
if not arg:
4747
return conf
4848

4949
for key, value in to_dict(arg).items():
50-
option = scheduler_run_opts.get(key)
50+
option = scheduler_opts.get(key)
5151
if option is None:
5252
raise ValueError(f"Unknown {key}, run `torchx runopts` for more info")
5353
option_type = option.opt_type
@@ -86,7 +86,7 @@ def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
8686
default=get_default_scheduler_name(),
8787
)
8888
subparser.add_argument(
89-
"-a",
89+
"-cfg",
9090
"--scheduler_args",
9191
type=str,
9292
help="Arguments to pass to the scheduler (Ex:`cluster=foo,user=bar`)."

torchx/runner/api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ def dryrun(
263263

264264
cfg = cfg or RunConfig()
265265
# TODO enable profiles - https://github.com/pytorch/torchx/issues/248
266-
config.apply(profile="default", scheduler=scheduler, runcfg=cfg)
266+
config.apply(scheduler=scheduler, cfg=cfg, profile="default")
267267

268268
sched = self._scheduler(scheduler)
269269
sched._validate(app, scheduler)

torchx/runner/config.py

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from torchx.schedulers import Scheduler, get_schedulers
1414
from torchx.specs import RunConfig, get_type_name
15+
from torchx.specs.api import runopt
1516

1617

1718
_NONE = "None"
@@ -47,22 +48,36 @@ def _get_scheduler(name: str) -> Scheduler:
4748
return sched
4849

4950

51+
def _fixme_placeholder(runopt: runopt, max_len: int = 60) -> str:
52+
ph = f"#FIXME:({get_type_name(runopt.opt_type)}) {runopt.help}"
53+
return ph if len(ph) <= max_len else f"{ph[:max_len]}..."
54+
55+
5056
def dump(
5157
f: TextIO, schedulers: Optional[List[str]] = None, required_only: bool = False
5258
) -> None:
5359
"""
54-
Dumps a default INI-style config template containing the runopts for the
55-
given scheduler names into ``f``. If no ``schedulers`` are specified
56-
dumps all known registered schedulers.
60+
Dumps a default INI-style config template containing the :py:class:torchx.specs.runopts for the
61+
given scheduler names into the file-like object specified by ``f``.
62+
If no ``schedulers`` are specified dumps all known registered schedulers.
5763
5864
Optional runopts are pre-filled with their default values.
59-
Required runopts are set with a ``<FIXME_...>`` placeholder.
65+
Required runopts are set with a ``FIXME: ...`` placeholder.
66+
To only dump required runopts pass ``required_only=True``.
67+
6068
Each scheduler's runopts are written in the section called
61-
``[default.scheduler_args.{scheduler_name}]`` (e.g. ``[default.scheduler_args.kubernetes]``)
69+
``[default.{scheduler_name}.cfg]``.
6270
63-
To only dump required runopts pass ``required_only=True``.
71+
For example:
72+
73+
::
6474
65-
Raises a ``ValueError`` if given a scheduler name that is not known
75+
[default.kubernetes.cfg]
76+
namespace = default
77+
queue = #FIXME (str)Volcano queue to schedule job in
78+
79+
Raises:
80+
``ValueError`` - if given a scheduler name that is not known
6681
"""
6782

6883
if schedulers:
@@ -74,12 +89,12 @@ def dump(
7489
for sched_name in scheds:
7590
sched = _get_scheduler(sched_name)
7691

77-
section = f"default.scheduler_args.{sched_name}"
92+
section = f"default.{sched_name}.cfg"
7893
config.add_section(section)
7994

8095
for opt_name, opt in sched.run_opts():
8196
if opt.is_required:
82-
val = f"<FIXME_WITH_A_{get_type_name(opt.opt_type)}_VALUE>"
97+
val = _fixme_placeholder(opt)
8398
else: # not required runopts MUST have a default
8499
if required_only:
85100
continue
@@ -96,11 +111,10 @@ def dump(
96111
val = f"{opt.default}"
97112

98113
config.set(section, opt_name, val)
99-
100114
config.write(f, space_around_delimiters=True)
101115

102116

103-
def apply(profile: str, scheduler: str, runcfg: RunConfig) -> None:
117+
def apply(scheduler: str, cfg: RunConfig, profile: str = "default") -> None:
104118
"""
105119
Loads .torchxconfig files from predefined locations according
106120
to a load hierarchy and applies the loaded configs into the
@@ -121,10 +135,10 @@ def apply(profile: str, scheduler: str, runcfg: RunConfig) -> None:
121135
if configfile.exists():
122136
log.info(f"loading configs from {configfile}")
123137
with open(str(configfile), "r") as f:
124-
load(profile, scheduler, f, runcfg)
138+
load(scheduler, f, cfg, profile)
125139

126140

127-
def load(profile: str, scheduler: str, f: TextIO, runcfg: RunConfig) -> None:
141+
def load(scheduler: str, f: TextIO, cfg: RunConfig, profile: str = "default") -> None:
128142
"""
129143
loads the section ``[{profile}.scheduler_args.{scheduler}]`` from the given
130144
configfile ``f`` (in .INI format) into the provided ``runcfg``, only adding
@@ -137,17 +151,17 @@ def load(profile: str, scheduler: str, f: TextIO, runcfg: RunConfig) -> None:
137151

138152
runopts = _get_scheduler(scheduler).run_opts()
139153

140-
section = f"{profile}.scheduler_args.{scheduler}"
154+
section = f"{profile}.{scheduler}.cfg"
141155
if config.has_section(section):
142156
for name, value in config.items(section):
143-
if name in runcfg.cfgs:
157+
if name in cfg.cfgs:
144158
# DO NOT OVERRIDE existing configs
145159
continue
146160

147161
if value == _NONE:
148162
# should map to None (not str 'None')
149163
# this also handles empty or None lists
150-
runcfg.set(name, None)
164+
cfg.set(name, None)
151165
else:
152166
runopt = runopts.get(name)
153167

@@ -161,9 +175,9 @@ def load(profile: str, scheduler: str, f: TextIO, runcfg: RunConfig) -> None:
161175
if runopt.opt_type is bool:
162176
# need to handle bool specially since str -> bool is based on
163177
# str emptiness not value (e.g. bool("False") == True)
164-
runcfg.set(name, config.getboolean(section, name))
178+
cfg.set(name, config.getboolean(section, name))
165179
elif runopt.opt_type is List[str]:
166-
runcfg.set(name, value.split(";"))
180+
cfg.set(name, value.split(";"))
167181
else:
168182
# pyre-ignore[29]
169-
runcfg.set(name, runopt.opt_type(value))
183+
cfg.set(name, runopt.opt_type(value))

torchx/runner/test/config_test.py

Lines changed: 49 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -102,30 +102,30 @@ def run_opts(self) -> runopts:
102102
return opts
103103

104104

105-
_CONFIG = """[default.scheduler_args.local_cwd]
105+
_CONFIG = """[default.local_cwd.cfg]
106106
log_dir = /home/bob/logs
107107
prepend_cwd = True
108108
109-
[test.scheduler_args.local_cwd]
109+
[test.local_cwd.cfg]
110110
log_dir = None
111111
prepend_cwd = False
112112
113-
[alpha.scheduler_args.local_cwd]
113+
[alpha.local_cwd.cfg]
114114
log_dir = /tmp/logs
115115
"""
116116

117-
_CONFIG_INVALID = """[default.scheduler_args.test]
117+
_CONFIG_INVALID = """[default.test.cfg]
118118
a_run_opt_that = does_not_exist
119119
s = option_that_exists
120120
"""
121121

122-
_TEAM_CONFIG = """[default.scheduler_args.test]
122+
_TEAM_CONFIG = """[default.test.cfg]
123123
s = team_default
124124
i = 50
125125
f = 1.2
126126
"""
127127

128-
_MY_CONFIG = """[default.scheduler_args.test]
128+
_MY_CONFIG = """[default.test.cfg]
129129
s = my_default
130130
i = 100
131131
"""
@@ -158,32 +158,30 @@ def _write(self, filename: str, content: str) -> Path:
158158
return f
159159

160160
def test_load(self) -> None:
161-
runcfg = RunConfig()
162-
load(
163-
profile="default", scheduler="local_cwd", f=StringIO(_CONFIG), runcfg=runcfg
164-
)
165-
self.assertEqual("/home/bob/logs", runcfg.get("log_dir"))
166-
self.assertEqual(True, runcfg.get("prepend_cwd"))
161+
cfg = RunConfig()
162+
load(profile="default", scheduler="local_cwd", f=StringIO(_CONFIG), cfg=cfg)
163+
self.assertEqual("/home/bob/logs", cfg.get("log_dir"))
164+
self.assertEqual(True, cfg.get("prepend_cwd"))
167165

168-
runcfg = RunConfig()
169-
load(profile="test", scheduler="local_cwd", f=StringIO(_CONFIG), runcfg=runcfg)
170-
self.assertEqual(None, runcfg.get("log_dir"))
171-
self.assertEqual(False, runcfg.get("prepend_cwd"))
166+
cfg = RunConfig()
167+
load(profile="test", scheduler="local_cwd", f=StringIO(_CONFIG), cfg=cfg)
168+
self.assertEqual(None, cfg.get("log_dir"))
169+
self.assertEqual(False, cfg.get("prepend_cwd"))
172170

173-
runcfg = RunConfig()
174-
load(profile="alpha", scheduler="local_cwd", f=StringIO(_CONFIG), runcfg=runcfg)
175-
self.assertEqual("/tmp/logs", runcfg.get("log_dir"))
176-
self.assertEqual(None, runcfg.get("prepend_cwd"))
171+
cfg = RunConfig()
172+
load(profile="alpha", scheduler="local_cwd", f=StringIO(_CONFIG), cfg=cfg)
173+
self.assertEqual("/tmp/logs", cfg.get("log_dir"))
174+
self.assertEqual(None, cfg.get("prepend_cwd"))
177175

178176
def test_no_override_load(self) -> None:
179-
runcfg = RunConfig()
180-
runcfg.set("log_dir", "/foo/bar")
181-
runcfg.set("debug", 1)
177+
cfg = RunConfig()
178+
cfg.set("log_dir", "/foo/bar")
179+
cfg.set("debug", 1)
182180

183-
load(profile="test", scheduler="local_cwd", f=StringIO(_CONFIG), runcfg=runcfg)
184-
self.assertEqual("/foo/bar", runcfg.get("log_dir"))
185-
self.assertEqual(1, runcfg.get("debug"))
186-
self.assertEqual(False, runcfg.get("prepend_cwd"))
181+
load(profile="test", scheduler="local_cwd", f=StringIO(_CONFIG), cfg=cfg)
182+
self.assertEqual("/foo/bar", cfg.get("log_dir"))
183+
self.assertEqual(1, cfg.get("debug"))
184+
self.assertEqual(False, cfg.get("prepend_cwd"))
187185

188186
@patch(
189187
TORCHX_GET_SCHEDULERS,
@@ -192,14 +190,14 @@ def test_no_override_load(self) -> None:
192190
def test_apply(self, _) -> None:
193191
with patch(PATH_CWD, return_value=Path(self.test_dir)):
194192
with patch(PATH_HOME, return_value=Path(self.test_dir) / "home" / "bob"):
195-
runcfg = RunConfig()
196-
runcfg.set("s", "runtime_value")
193+
cfg = RunConfig()
194+
cfg.set("s", "runtime_value")
197195

198-
apply(profile="default", scheduler="test", runcfg=runcfg)
196+
apply(profile="default", scheduler="test", cfg=cfg)
199197

200-
self.assertEqual("runtime_value", runcfg.get("s"))
201-
self.assertEqual(100, runcfg.get("i"))
202-
self.assertEqual(1.2, runcfg.get("f"))
198+
self.assertEqual("runtime_value", cfg.get("s"))
199+
self.assertEqual(100, cfg.get("i"))
200+
self.assertEqual(1.2, cfg.get("f"))
203201

204202
def test_dump_invalid_scheduler(self) -> None:
205203
with self.assertRaises(ValueError):
@@ -215,50 +213,50 @@ def test_dump_only_required(self, _) -> None:
215213
# test scheduler has no required options hence expect empty string
216214
dump(f=sfile, required_only=True)
217215

218-
runcfg = RunConfig()
216+
cfg = RunConfig()
219217
sfile.seek(0)
220-
load(profile="default", scheduler="test", f=sfile, runcfg=runcfg)
218+
load(profile="default", scheduler="test", f=sfile, cfg=cfg)
221219

222-
self.assertFalse(runcfg.cfgs)
220+
self.assertFalse(cfg.cfgs)
223221

224222
@patch(
225223
TORCHX_GET_SCHEDULERS,
226224
return_value={"test": TestScheduler()},
227225
)
228226
def test_load_invalid_runopt(self, _) -> None:
229-
runcfg = RunConfig()
227+
cfg = RunConfig()
230228
load(
231229
profile="default",
232230
scheduler="test",
233231
f=StringIO(_CONFIG_INVALID),
234-
runcfg=runcfg,
232+
cfg=cfg,
235233
)
236234
# options in the config file but not in runopts
237235
# should be ignored (we shouldn't throw an error since
238236
# this makes things super hard to guarantee BC - stale config file will fail
239237
# to run, we don't want that)
240238

241-
self.assertEquals("option_that_exists", runcfg.get("s"))
239+
self.assertEquals("option_that_exists", cfg.get("s"))
242240

243241
def test_load_no_section(self) -> None:
244-
runcfg = RunConfig()
242+
cfg = RunConfig()
245243
load(
246244
profile="default",
247245
scheduler="local_cwd",
248246
f=StringIO(),
249-
runcfg=runcfg,
247+
cfg=cfg,
250248
)
251249
# is empty
252-
self.assertFalse(runcfg.cfgs)
250+
self.assertFalse(cfg.cfgs)
253251

254252
load(
255253
profile="default",
256254
scheduler="local_cwd",
257255
f=StringIO("[default.scheduler_args.local_cwd]\n"),
258-
runcfg=runcfg,
256+
cfg=cfg,
259257
)
260258
# still empty
261-
self.assertFalse(runcfg.cfgs)
259+
self.assertFalse(cfg.cfgs)
262260

263261
@patch(
264262
TORCHX_GET_SCHEDULERS,
@@ -270,12 +268,12 @@ def test_dump_and_load_all_runopt_types(self, _) -> None:
270268

271269
sfile.seek(0)
272270

273-
runcfg = RunConfig()
274-
load(profile="default", scheduler="test", f=sfile, runcfg=runcfg)
271+
cfg = RunConfig()
272+
load(profile="default", scheduler="test", f=sfile, cfg=cfg)
275273

276274
# all runopts in the TestScheduler have defaults, just check against those
277275
for opt_name, opt in TestScheduler().run_opts():
278-
self.assertEqual(runcfg.get(opt_name), opt.default)
276+
self.assertEqual(cfg.get(opt_name), opt.default)
279277

280278
def test_dump_and_load_all_registered_schedulers(self) -> None:
281279
# dump all the runopts for all registered schedulers
@@ -284,11 +282,11 @@ def test_dump_and_load_all_registered_schedulers(self) -> None:
284282

285283
sfile = StringIO()
286284
dump(sfile)
287-
285+
print(sfile.getvalue())
288286
for sched_name, sched in get_schedulers(session_name="_").items():
289287
sfile.seek(0) # reset the file pos
290-
runcfg = RunConfig()
291-
load(profile="default", scheduler=sched_name, f=sfile, runcfg=runcfg)
288+
cfg = RunConfig()
289+
load(profile="default", scheduler=sched_name, f=sfile, cfg=cfg)
292290

293291
for opt_name, _ in sched.run_opts():
294-
self.assertTrue(opt_name in runcfg.cfgs)
292+
self.assertTrue(opt_name in cfg.cfgs)

0 commit comments

Comments
 (0)