Skip to content

Commit 0434cf6

Browse files
authored
Add pathways + nap support (#623)
* feat: add pathways + nap support * feat: add support for nap arguments in pathways workload create * style: remove unecessary comments
1 parent f51613d commit 0434cf6

File tree

5 files changed

+65
-50
lines changed

5 files changed

+65
-50
lines changed

src/xpk/commands/cluster.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,6 @@ def cluster_adapt(args) -> None:
141141
if not tensorboard_config:
142142
xpk_exit(1)
143143

144-
# Provision node pools dynamically based on incoming workloads:
145-
# Currently autoprovisioning is not supported with Pathways.
146144
autoprovisioning_config = None
147145
if args.enable_autoprovisioning:
148146
xpk_print('Enabling Autoprovisioning')
@@ -294,7 +292,7 @@ def cluster_create(args) -> None:
294292
# Provision node pools dynamically based on incoming workloads:
295293
# Currently autoprovisioning is not supported with Pathways.
296294
autoprovisioning_config = None
297-
if not args.enable_pathways and args.enable_autoprovisioning:
295+
if args.enable_autoprovisioning:
298296
xpk_print('Enabling Autoprovisioning')
299297
autoprovisioning_config, return_code = enable_autoprovisioning_on_cluster(
300298
args, system

src/xpk/core/nap.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -269,9 +269,6 @@ def is_autoprovisioning_enabled(
269269
bool is true if autoprovisioning is enabled, false otherwise.
270270
int of 0 if successful and 1 otherwise.
271271
"""
272-
# Currently autoprovisioning is not enabled for Pathways workloads. b/360898087
273-
if args.use_pathways:
274-
return False, 0
275272

276273
resources_configmap_name = f'{args.cluster}-{CLUSTER_RESOURCES_CONFIGMAP}'
277274
cluster_config_map = get_cluster_configmap(args, resources_configmap_name)

src/xpk/core/resources.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,7 @@ def create_cluster_configmaps(
108108
device_type = system.device_type
109109
if system.accelerator_type == AcceleratorType['GPU']:
110110
resources_data = f'{device_type}: "{int(args.num_nodes)}"'
111-
elif (
112-
not args.enable_pathways
113-
and args.enable_autoprovisioning
114-
and autoprovisioning_config
115-
):
116-
# Currently autoprovisioning is not supported with Pathways.
117-
# Auto provisioning will have variable topologies for a gke accelerator type.
111+
elif args.enable_autoprovisioning and autoprovisioning_config:
118112
resources_data = (
119113
f'{system.gke_accelerator}: {AUTOPROVISIONING_CONFIG_VALUE}'
120114
)

src/xpk/parser/cluster.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,14 @@ def set_cluster_create_pathways_parser(
208208
cluster_create_pathways_optional_arguments
209209
)
210210

211+
autoprovisioning_arguments = (
212+
cluster_create_pathways_parser.add_argument_group(
213+
'Autoprovisioning Arguments',
214+
'Optional arguments for enabling autoprovisioning.',
215+
)
216+
)
217+
add_autoprovisioning_arguments(autoprovisioning_arguments)
218+
211219
### Capacity arguments specific to "cluster create-pathways"
212220
cluster_create_pathways_capacity_arguments = (
213221
cluster_create_pathways_parser.add_argument_group(

src/xpk/parser/workload.py

Lines changed: 55 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -193,43 +193,6 @@ def set_workload_parsers(workload_parser):
193193
),
194194
)
195195

196-
# Autoprovisioning workload arguments
197-
workload_create_autoprovisioning_arguments.add_argument(
198-
'--on-demand',
199-
action='store_true',
200-
help=(
201-
'Sets autoprovisioning to use on-demand resources for the workload'
202-
' request. See `--reservation` or `--spot` for other capacity types.'
203-
),
204-
)
205-
workload_create_autoprovisioning_arguments.add_argument(
206-
'--reservation',
207-
type=str,
208-
help=(
209-
'Sets autoprovisioning to use reservation resources for the workload'
210-
' request. This will attempt to find the provided reservation. See'
211-
' `--spot`, `--flex` or `--on-demand` for other capacity types.'
212-
),
213-
)
214-
workload_create_autoprovisioning_arguments.add_argument(
215-
'--spot',
216-
action='store_true',
217-
help=(
218-
'Sets autoprovisioning to use spot resources. See `--reservation`,'
219-
' `--flex` or `--on-demand` for other capacity types.'
220-
),
221-
)
222-
223-
workload_create_autoprovisioning_arguments.add_argument(
224-
'--flex',
225-
action='store_true',
226-
help=(
227-
'Sets autoprovisioning to use flex-start resources. See'
228-
' `--reservation`, `--spot` or `--on-demand` for other capacity'
229-
' types.'
230-
),
231-
)
232-
233196
# "workload create-pathways" command parser.
234197
workload_create_pathways_parser = workload_subcommands.add_parser(
235198
'create-pathways', help='Create a new job.'
@@ -257,6 +220,12 @@ def set_workload_parsers(workload_parser):
257220
'`--base-docker-image` is used by default. Set this argument if the'
258221
' user wants the docker image to be used directly by the xpk workload.',
259222
)
223+
workload_create_pathways_autoprovisioning_arguments = (
224+
workload_create_pathways_parser.add_argument_group(
225+
'Optional Autoprovisioning Arguments',
226+
'Arguments for configuring autoprovisioning.',
227+
)
228+
)
260229
workload_create_pathways_vertex_tensorboard_arguments = (
261230
workload_create_pathways_parser.add_argument_group(
262231
'Vertex Tensorboard Arguments',
@@ -407,6 +376,10 @@ def set_workload_parsers(workload_parser):
407376
workload_vertex_tensorboard_arguments,
408377
workload_create_pathways_vertex_tensorboard_arguments,
409378
])
379+
add_shared_workload_create_autoprovisioning_arguments([
380+
workload_create_autoprovisioning_arguments,
381+
workload_create_pathways_autoprovisioning_arguments,
382+
])
410383

411384
# Set defaults for both workload create and workload create-pathways after adding all shared args.
412385
workload_create_parser.set_defaults(func=workload_create)
@@ -770,3 +743,48 @@ def add_shared_workload_create_tensorboard_arguments(args_parsers):
770743
'<cluster>-<workload> will be created.'
771744
),
772745
)
746+
747+
748+
def add_shared_workload_create_autoprovisioning_arguments(args_parsers):
749+
"""Add shared autoprovisioning arguments
750+
751+
Args:
752+
List of workload create optional arguments parsers
753+
"""
754+
for custom_parser in args_parsers:
755+
custom_parser.add_argument(
756+
'--on-demand',
757+
action='store_true',
758+
help=(
759+
'Sets autoprovisioning to use on-demand resources for the workload'
760+
' request. See `--reservation` or `--spot` for other capacity'
761+
' types.'
762+
),
763+
)
764+
custom_parser.add_argument(
765+
'--reservation',
766+
type=str,
767+
help=(
768+
'Sets autoprovisioning to use reservation resources for the'
769+
' workload request. This will attempt to find the provided'
770+
' reservation. See `--spot`, `--flex` or `--on-demand` for other'
771+
' capacity types.'
772+
),
773+
)
774+
custom_parser.add_argument(
775+
'--spot',
776+
action='store_true',
777+
help=(
778+
'Sets autoprovisioning to use spot resources. See `--reservation`,'
779+
' `--flex` or `--on-demand` for other capacity types.'
780+
),
781+
)
782+
custom_parser.add_argument(
783+
'--flex',
784+
action='store_true',
785+
help=(
786+
'Sets autoprovisioning to use flex-start resources. See'
787+
' `--reservation`, `--spot` or `--on-demand` for other capacity'
788+
' types.'
789+
),
790+
)

0 commit comments

Comments
 (0)