Skip to content

Commit c0e3da2

Browse files
committed
Added unit test
1 parent a7167ba commit c0e3da2

File tree

3 files changed

+26
-95
lines changed

3 files changed

+26
-95
lines changed

src/xpk/commands/cluster.py

Lines changed: 3 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,14 +1463,16 @@ def install_diagon_prerequisites():
14631463
"""
14641464
deployment_name = 'kueue-controller-manager'
14651465
namespace_name = 'kueue-system'
1466+
cert_webhook_deployment_name = 'cert-manager-webhook'
1467+
cert_webhook_namespace_name = 'cert-manager'
14661468
# is_running = wait_for_cluster_running(args)
14671469
is_running = wait_for_deployment_ready(deployment_name, namespace_name)
14681470
if is_running:
14691471
return_code = install_cert_manager()
14701472
if return_code != 0:
14711473
return return_code
14721474

1473-
cert_webhook_ready = check_cert_manager_webhook_status()
1475+
cert_webhook_ready = wait_for_deployment_ready(cert_webhook_deployment_name, cert_webhook_namespace_name)
14741476
if cert_webhook_ready:
14751477

14761478
webhook_package = "mldiagnostics-injection-webhook"
@@ -1553,96 +1555,3 @@ def wait_for_deployment_ready(deployment_name: str, namespace: str, timeout_seco
15531555
xpk_print(f"\nUnexpected API request error while checking deployment status: {e}")
15541556
time.sleep(10)
15551557
return False
1556-
1557-
1558-
def wait_for_cluster_running(args, timeout_minutes: int = 30) -> bool:
1559-
"""
1560-
Polls the GKE Cluster status using gcloud CLI until it enters the RUNNING state.
1561-
1562-
Args:
1563-
args: user provided arguments for running the command.
1564-
timeout_minutes: Timeout duration in minutes.
1565-
1566-
Returns:
1567-
bool: True if the Cluster successfully enters the RUNNING state, False otherwise.
1568-
"""
1569-
timeout_seconds = timeout_minutes * 60
1570-
start_time = time.time()
1571-
1572-
# Construct gcloud command to describe the cluster status
1573-
command = (
1574-
'gcloud container clusters describe'
1575-
f' {args.cluster} --region={zone_to_region(args.zone)} --project={args.project}'
1576-
" --format='value(status)'"
1577-
)
1578-
1579-
print(f"Waiting for cluster {args.cluster} ({args.zone}) to enter RUNNING state (using gcloud CLI)...")
1580-
1581-
while time.time() - start_time < timeout_seconds:
1582-
try:
1583-
# Execute the gcloud command
1584-
return_code, return_output = run_command_for_value(
1585-
command, f'Get the status of cluster...'
1586-
)
1587-
# Check if gcloud command itself returned an error
1588-
if return_code != 0:
1589-
# If Not found error, the cluster does not exist
1590-
if "Not found" in return_output:
1591-
xpk_print(f"\nError: Cluster {args.cluster} does not exist in {args.zone}.")
1592-
return False
1593-
1594-
# Other execution errors, wait and retry
1595-
xpk_print(f"\nError: gcloud command failed. {return_output}")
1596-
time.sleep(10)
1597-
continue
1598-
1599-
# Check cluster status returned by gcloud
1600-
if "RUNNING" in return_output:
1601-
xpk_print(f"Success: Cluster {args.cluster} status is RUNNING.")
1602-
return True
1603-
1604-
elif "ERROR" in return_output or "DEGRADED" in return_output:
1605-
xpk_print(f"Error: Cluster status is {return_output}, creation failed.")
1606-
return False
1607-
else:
1608-
elapsed_time = int(time.time() - start_time)
1609-
xpk_print(f"Current status: {return_output}. Elapsed time: {elapsed_time} seconds. Checking again...")
1610-
except Exception as e:
1611-
xpk_print(f"\nUnexpected API request error: {e}")
1612-
time.sleep(10) # Wait longer on unexpected errors
1613-
1614-
# Poll interval
1615-
time.sleep(30)
1616-
1617-
xpk_print(f"\nTimeout Error: Cluster did not reach RUNNING state within {timeout_minutes} minutes.")
1618-
return False
1619-
1620-
def check_cert_manager_webhook_status(timeout_seconds: int = 300) -> bool:
1621-
"""
1622-
Runs and checks the exit code of kubectl rollout status for a specific deployment.
1623-
1624-
Args:
1625-
timeout_seconds (int): The maximum time to wait for the rollout to complete.
1626-
1627-
Returns:
1628-
bool: True if the rollout status is successful (exit code 0), False otherwise.
1629-
"""
1630-
# Build the kubectl command
1631-
kubectl_command = (
1632-
'kubectl rollout status deployment/cert-manager-webhook -n cert-manager'
1633-
f' --timeout={timeout_seconds}s'
1634-
)
1635-
1636-
xpk_print(f"Running command to check deployment status: {kubectl_command}")
1637-
1638-
try:
1639-
return_code, return_output = run_command_for_value(
1640-
kubectl_command, f'check cert manager...'
1641-
)
1642-
if "successfully rolled out" in return_output:
1643-
xpk_print(f"SUCCESS: Deployment cert-manager-webhook rollout completed.")
1644-
return True
1645-
1646-
except Exception as e:
1647-
xpk_print(f"\nUnexpected error during kubectl execution: {e}")
1648-
return False

src/xpk/commands/cluster_test.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from unittest.mock import MagicMock, patch
2121
import pytest
2222

23-
from xpk.commands.cluster import _install_kueue, _validate_cluster_create_args, run_gke_cluster_create_command
23+
from xpk.commands.cluster import _install_kueue, _validate_cluster_create_args, run_gke_cluster_create_command, install_diagon_prerequisites
2424
from xpk.core.system_characteristics import SystemCharacteristics, UserFacingNameToSystemCharacteristics
2525
from xpk.core.testing.commands_tester import CommandsTester
2626
from xpk.utils.feature_flags import FeatureFlags
@@ -87,6 +87,7 @@ def construct_args(**kwargs: Any) -> Namespace:
8787
memory_limit='100Gi',
8888
cpu_limit=100,
8989
cluster_cpu_machine_type='',
90+
managed_mldiagnostics=False,
9091
)
9192
args_dict.update(kwargs)
9293
return Namespace(**args_dict)
@@ -247,3 +248,13 @@ def test_run_gke_cluster_create_command_with_gke_version_has_no_autoupgrade_flag
247248
mocks.commands_tester.assert_command_run(
248249
'clusters create', ' --no-enable-autoupgrade'
249250
)
251+
252+
253+
def test_cluster_create_args_for_install_diagon_prerequisites(
254+
mocks: _Mocks,
255+
):
256+
257+
install_diagon_prerequisites()
258+
259+
assert mocks.common_print_mock.call_count == 0
260+

src/xpk/parser/cluster_test.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,14 @@ def test_cluster_create_sub_slicing_can_be_set():
6464
)
6565

6666
assert args.sub_slicing is True
67+
68+
69+
def test_cluster_create_managed_mldiagnostics():
70+
parser = argparse.ArgumentParser()
71+
72+
set_cluster_create_parser(parser)
73+
args = parser.parse_args(
74+
["--cluster", "test-cluster", "--tpu-type", "v5p-8", "--managed-mldiagnostics"]
75+
)
76+
77+
assert args.managed_mldiagnostics is True

0 commit comments

Comments
 (0)