Skip to content

Commit d5e7c7f

Browse files
committed
Merge branch 'develop' of https://github.com/AI-Hypercomputer/xpk into develop
2 parents 1f895d9 + 133f2a4 commit d5e7c7f

File tree

3 files changed

+7
-4
lines changed

3 files changed

+7
-4
lines changed

src/xpk/core/capacity.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def verify_reservation_exists(args) -> int:
173173

174174

175175
def get_capacity_arguments_from_capacity_type(
176-
args, capacity_type: CapacityType
176+
args, capacity_type: CapacityType, max_nodes: int
177177
) -> tuple[str, int]:
178178
"""Determine the Nodepool creation capacity arguments needed.
179179
@@ -197,7 +197,7 @@ def get_capacity_arguments_from_capacity_type(
197197
capacity_args = (
198198
' --flex-start --enable-queued-provisioning --enable-autoscaling'
199199
' --location-policy=ANY --reservation-affinity=none'
200-
' --no-enable-autorepair --max-nodes=1'
200+
f' --no-enable-autorepair --max-nodes={max_nodes}'
201201
)
202202
case CapacityType.RESERVATION:
203203
capacity_args = (

src/xpk/core/kjob.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,6 @@ def create_pod_template_instance(args: Namespace, service_account: str) -> int:
346346
if pod_image is None or len(pod_image) == 0:
347347
pod_image = PodTemplateDefaults.IMAGE.value
348348
working_directory = config.get(KJOB_SHELL_WORKING_DIRECTORY)
349-
350349
if working_directory is None or len(working_directory) == 0:
351350
working_directory = PodTemplateDefaults.WORKING_DIRECTORY.value
352351

src/xpk/core/nodepool.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,12 @@ def run_gke_node_pool_create_command(
7777
if return_code > 0:
7878
xpk_print('Listing all reservations failed!')
7979
return_code = 1
80+
if system.accelerator_type == AcceleratorType['TPU']:
81+
max_nodes = system.vms_per_slice
82+
else:
83+
max_nodes = 1000
8084
capacity_args, return_code = get_capacity_arguments_from_capacity_type(
81-
args, capacity_type
85+
args, capacity_type, max_nodes
8286
)
8387
if return_code > 0:
8488
xpk_print('Parsing capacity arguments failed!')

0 commit comments

Comments
 (0)