Skip to content

Commit c242687

Browse files
committed
Added unit test
1 parent 85f228d commit c242687

File tree

3 files changed

+26
-95
lines changed

3 files changed

+26
-95
lines changed

src/xpk/commands/cluster.py

Lines changed: 3 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -1464,14 +1464,16 @@ def install_diagon_prerequisites():
14641464
"""
14651465
deployment_name = 'kueue-controller-manager'
14661466
namespace_name = 'kueue-system'
1467+
cert_webhook_deployment_name = 'cert-manager-webhook'
1468+
cert_webhook_namespace_name = 'cert-manager'
14671469
# is_running = wait_for_cluster_running(args)
14681470
is_running = wait_for_deployment_ready(deployment_name, namespace_name)
14691471
if is_running:
14701472
return_code = install_cert_manager()
14711473
if return_code != 0:
14721474
return return_code
14731475

1474-
cert_webhook_ready = check_cert_manager_webhook_status()
1476+
cert_webhook_ready = wait_for_deployment_ready(cert_webhook_deployment_name, cert_webhook_namespace_name)
14751477
if cert_webhook_ready:
14761478

14771479
webhook_package = "mldiagnostics-injection-webhook"
@@ -1554,96 +1556,3 @@ def wait_for_deployment_ready(deployment_name: str, namespace: str, timeout_seco
15541556
xpk_print(f"\nUnexpected API request error while checking deployment status: {e}")
15551557
time.sleep(10)
15561558
return False
1557-
1558-
1559-
def wait_for_cluster_running(args, timeout_minutes: int = 30) -> bool:
1560-
"""
1561-
Polls the GKE Cluster status using gcloud CLI until it enters the RUNNING state.
1562-
1563-
Args:
1564-
args: user provided arguments for running the command.
1565-
timeout_minutes: Timeout duration in minutes.
1566-
1567-
Returns:
1568-
bool: True if the Cluster successfully enters the RUNNING state, False otherwise.
1569-
"""
1570-
timeout_seconds = timeout_minutes * 60
1571-
start_time = time.time()
1572-
1573-
# Construct gcloud command to describe the cluster status
1574-
command = (
1575-
'gcloud container clusters describe'
1576-
f' {args.cluster} --region={zone_to_region(args.zone)} --project={args.project}'
1577-
" --format='value(status)'"
1578-
)
1579-
1580-
print(f"Waiting for cluster {args.cluster} ({args.zone}) to enter RUNNING state (using gcloud CLI)...")
1581-
1582-
while time.time() - start_time < timeout_seconds:
1583-
try:
1584-
# Execute the gcloud command
1585-
return_code, return_output = run_command_for_value(
1586-
command, f'Get the status of cluster...'
1587-
)
1588-
# Check if gcloud command itself returned an error
1589-
if return_code != 0:
1590-
# If Not found error, the cluster does not exist
1591-
if "Not found" in return_output:
1592-
xpk_print(f"\nError: Cluster {args.cluster} does not exist in {args.zone}.")
1593-
return False
1594-
1595-
# Other execution errors, wait and retry
1596-
xpk_print(f"\nError: gcloud command failed. {return_output}")
1597-
time.sleep(10)
1598-
continue
1599-
1600-
# Check cluster status returned by gcloud
1601-
if "RUNNING" in return_output:
1602-
xpk_print(f"Success: Cluster {args.cluster} status is RUNNING.")
1603-
return True
1604-
1605-
elif "ERROR" in return_output or "DEGRADED" in return_output:
1606-
xpk_print(f"Error: Cluster status is {return_output}, creation failed.")
1607-
return False
1608-
else:
1609-
elapsed_time = int(time.time() - start_time)
1610-
xpk_print(f"Current status: {return_output}. Elapsed time: {elapsed_time} seconds. Checking again...")
1611-
except Exception as e:
1612-
xpk_print(f"\nUnexpected API request error: {e}")
1613-
time.sleep(10) # Wait longer on unexpected errors
1614-
1615-
# Poll interval
1616-
time.sleep(30)
1617-
1618-
xpk_print(f"\nTimeout Error: Cluster did not reach RUNNING state within {timeout_minutes} minutes.")
1619-
return False
1620-
1621-
def check_cert_manager_webhook_status(timeout_seconds: int = 300) -> bool:
1622-
"""
1623-
Runs and checks the exit code of kubectl rollout status for a specific deployment.
1624-
1625-
Args:
1626-
timeout_seconds (int): The maximum time to wait for the rollout to complete.
1627-
1628-
Returns:
1629-
bool: True if the rollout status is successful (exit code 0), False otherwise.
1630-
"""
1631-
# Build the kubectl command
1632-
kubectl_command = (
1633-
'kubectl rollout status deployment/cert-manager-webhook -n cert-manager'
1634-
f' --timeout={timeout_seconds}s'
1635-
)
1636-
1637-
xpk_print(f"Running command to check deployment status: {kubectl_command}")
1638-
1639-
try:
1640-
return_code, return_output = run_command_for_value(
1641-
kubectl_command, f'check cert manager...'
1642-
)
1643-
if "successfully rolled out" in return_output:
1644-
xpk_print(f"SUCCESS: Deployment cert-manager-webhook rollout completed.")
1645-
return True
1646-
1647-
except Exception as e:
1648-
xpk_print(f"\nUnexpected error during kubectl execution: {e}")
1649-
return False

src/xpk/commands/cluster_test.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from unittest.mock import MagicMock, patch
2121
import pytest
2222

23-
from xpk.commands.cluster import _install_kueue, _validate_cluster_create_args, run_gke_cluster_create_command
23+
from xpk.commands.cluster import _install_kueue, _validate_cluster_create_args, run_gke_cluster_create_command, install_diagon_prerequisites
2424
from xpk.core.system_characteristics import SystemCharacteristics, UserFacingNameToSystemCharacteristics
2525
from xpk.core.testing.commands_tester import CommandsTester
2626
from xpk.utils.feature_flags import FeatureFlags
@@ -87,6 +87,7 @@ def construct_args(**kwargs: Any) -> Namespace:
8787
memory_limit='100Gi',
8888
cpu_limit=100,
8989
cluster_cpu_machine_type='',
90+
managed_mldiagnostics=False,
9091
)
9192
args_dict.update(kwargs)
9293
return Namespace(**args_dict)
@@ -247,3 +248,13 @@ def test_run_gke_cluster_create_command_with_gke_version_has_no_autoupgrade_flag
247248
mocks.commands_tester.assert_command_run(
248249
'clusters create', ' --no-enable-autoupgrade'
249250
)
251+
252+
253+
def test_cluster_create_args_for_install_diagon_prerequisites(
254+
mocks: _Mocks,
255+
):
256+
257+
install_diagon_prerequisites()
258+
259+
assert mocks.common_print_mock.call_count == 0
260+

src/xpk/parser/cluster_test.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,14 @@ def test_cluster_create_sub_slicing_can_be_set():
6464
)
6565

6666
assert args.sub_slicing is True
67+
68+
69+
def test_cluster_create_managed_mldiagnostics():
70+
parser = argparse.ArgumentParser()
71+
72+
set_cluster_create_parser(parser)
73+
args = parser.parse_args(
74+
["--cluster", "test-cluster", "--tpu-type", "v5p-8", "--managed-mldiagnostics"]
75+
)
76+
77+
assert args.managed_mldiagnostics is True

0 commit comments

Comments
 (0)