Skip to content

Commit 0b052aa

Browse files
committed
update goldens.yaml
1 parent 0f90f8e commit 0b052aa

File tree

3 files changed

+47
-69
lines changed

3 files changed

+47
-69
lines changed

goldens.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
goldens:
22
"NAP cluster-create with pathways":
3-
command: xpk cluster create-pathways --project=golden-project --zone=us-central1-a --enable-autoprovisioning --cluster=golden-cluster --tpu-type=tpu7x-8 --on-demand --dry-run
3+
command: xpk cluster create-pathways --project=golden-project --zone=us-central1-a --enable-autoprovisioning --cluster=golden-cluster --tpu-type=tpu7x-8 --on-demand --dry-run --managed-mldiagnostics
44
"NAP cluster-create":
55
command: xpk cluster create --project=golden-project --zone=us-central1-a --enable-autoprovisioning --cluster=golden-cluster --tpu-type=tpu7x-8 --on-demand --dry-run
66
"Basic cluster create":

goldens/NAP_cluster-create_with_pathways.txt

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -219,17 +219,4 @@ kubectl apply -f a63aa3c4593c38ad90671fd8b067d1886f6313ad558379b364b51791aa50f4e
219219
kubectl apply -f 1d13ddebae3c90a05ba26b312df088982dd0df0edc4f4013b88384e476c20486
220220
[XPK] GKE commands done! Resources are created.
221221
[XPK] See your GKE Cluster here: https://console.cloud.google.com/kubernetes/clusters/details/us-central1/golden-cluster/details?project=golden-project
222-
Traceback (most recent call last):
223-
File "/usr/local/google/home/lidanny/Desktop/Project/diagon_xpk/cienet_xpk/xpk/xpk.py", line 39, in <module>
224-
main()
225-
~~~~^^
226-
File "/usr/local/google/home/lidanny/Desktop/Project/diagon_xpk/cienet_xpk/xpk/src/xpk/main.py", line 77, in main
227-
main_args.func(main_args)
228-
~~~~~~~~~~~~~~^^^^^^^^^^^
229-
File "/usr/local/google/home/lidanny/Desktop/Project/diagon_xpk/cienet_xpk/xpk/src/xpk/commands/cluster.py", line 765, in cluster_create_pathways
230-
cluster_create(args)
231-
~~~~~~~~~~~~~~^^^^^^
232-
File "/usr/local/google/home/lidanny/Desktop/Project/diagon_xpk/cienet_xpk/xpk/src/xpk/commands/cluster.py", line 411, in cluster_create
233-
if args.managed_mldiagnostics:
234-
^^^^^^^^^^^^^^^^^^^^^^^^^^
235-
AttributeError: 'Namespace' object has no attribute 'managed_mldiagnostics'
222+
[XPK] Exiting XPK cleanly

src/xpk/commands/cluster.py

Lines changed: 45 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,7 +1330,7 @@ def install_cert_manager(version: str = 'v1.13.0'):
13301330
Returns:
13311331
0 if successful and 1 otherwise.
13321332
"""
1333-
1333+
13341334
command = (
13351335
f'kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/'
13361336
f'{version}/cert-manager.yaml'
@@ -1358,19 +1358,19 @@ def download_mldiagnostics_yaml(package_name: str, version: str):
13581358
f'--package={package_name} --version={version} --destination=./ '
13591359
f'--project=ai-on-gke'
13601360
)
1361-
1361+
13621362
return_code, return_output = run_command_for_value(
13631363
command, f'Starting gcloud artifacts download for {package_name} {version}...'
13641364
)
1365-
1365+
13661366
if return_code != 0:
13671367
if 'already exists' in return_output:
13681368
xpk_print(f'Artifact file for {package_name} {version} already exists locally. Skipping download.')
13691369
return 0
13701370
xpk_print(f'gcloud download returned with ERROR {return_code}.\n')
13711371
xpk_exit(return_code)
13721372

1373-
xpk_print(f'Artifact download completed successfully.')
1373+
xpk_print('Artifact download completed successfully.')
13741374
return return_code
13751375

13761376
def create_mldiagnostics_namespace():
@@ -1380,21 +1380,21 @@ def create_mldiagnostics_namespace():
13801380
Returns:
13811381
0 if successful and 1 otherwise.
13821382
"""
1383-
1384-
command = f'kubectl create namespace gke-diagon'
1385-
1383+
1384+
command = 'kubectl create namespace gke-diagon'
1385+
13861386
return_code, return_output = run_command_for_value(
1387-
command, f'Starting kubectl create namespace...'
1387+
command, 'Starting kubectl create namespace...'
13881388
)
1389-
1389+
13901390
if return_code != 0:
13911391
if 'already exists' in return_output:
13921392
xpk_print('Namespace already exists locally. Skipping creation.')
13931393
return 0
13941394
xpk_print(f'Namespace creation returned with ERROR {return_code}.\n')
13951395
xpk_exit(return_code)
13961396

1397-
xpk_print(f'gke-diagon Namespace created or already exists.')
1397+
xpk_print('gke-diagon Namespace created or already exists.')
13981398
return return_code
13991399

14001400
def install_mldiagnostics_yaml(artifact_filename: str):
@@ -1404,17 +1404,17 @@ def install_mldiagnostics_yaml(artifact_filename: str):
14041404
Returns:
14051405
0 if successful and 1 otherwise.
14061406
"""
1407-
1407+
14081408
command = f'kubectl apply -f {artifact_filename} -n gke-diagon'
1409-
1409+
14101410
return_code = run_command_with_updates(
14111411
command, f'Starting kubectl apply -f {artifact_filename} -n gke-diagon...'
14121412
)
1413-
1413+
14141414
if return_code != 0:
14151415
xpk_print(f'kubectl apply returned with ERROR {return_code}.\n')
14161416
xpk_exit(return_code)
1417-
1417+
14181418
xpk_print(f'{artifact_filename} applied successfully.')
14191419

14201420
if os.path.exists(artifact_filename):
@@ -1424,13 +1424,10 @@ def install_mldiagnostics_yaml(artifact_filename: str):
14241424

14251425
except PermissionError:
14261426
xpk_print(f'Failed to delete file {artifact_filename} due to Permission Error.')
1427-
1428-
except Exception as e:
1429-
xpk_print(f'Failed to delete file {artifact_filename}. Unexpected error: {e}')
14301427

14311428
else:
14321429
xpk_print(f'File {artifact_filename} does not exist locally. Skipping deletion (Cleanup assumed).')
1433-
1430+
14341431
return return_code
14351432

14361433
def label_default_namespace_mldiagnostics():
@@ -1440,13 +1437,13 @@ def label_default_namespace_mldiagnostics():
14401437
Returns:
14411438
0 if successful and 1 otherwise.
14421439
"""
1443-
1444-
command = f'kubectl label namespace default diagon-enabled=true'
1445-
1440+
1441+
command = 'kubectl label namespace default diagon-enabled=true'
1442+
14461443
return_code = run_command_with_updates(
1447-
command, f"Starting kubectl label namespace default with diagon-enabled=true..."
1444+
command, 'Starting kubectl label namespace default with diagon-enabled=true...'
14481445
)
1449-
1446+
14501447
if return_code != 0:
14511448
xpk_print(f'Namespace labeling returned with ERROR {return_code}.\n')
14521449
xpk_exit(return_code)
@@ -1475,46 +1472,46 @@ def install_diagon_prerequisites():
14751472
cert_webhook_ready = wait_for_deployment_ready(cert_webhook_deployment_name, cert_webhook_namespace_name)
14761473
if cert_webhook_ready:
14771474

1478-
webhook_package = "mldiagnostics-injection-webhook"
1479-
webhook_version = "v0.3.0"
1480-
webhook_filename = f"{webhook_package}-{webhook_version}.yaml"
1475+
webhook_package = 'mldiagnostics-injection-webhook'
1476+
webhook_version = 'v0.3.0'
1477+
webhook_filename = f'{webhook_package}-{webhook_version}.yaml'
14811478

14821479
return_code = download_mldiagnostics_yaml(package_name=webhook_package, version=webhook_version)
14831480
if return_code != 0:
14841481
return return_code
1485-
1482+
14861483
return_code = create_mldiagnostics_namespace()
14871484
if return_code != 0:
14881485
return return_code
14891486

14901487
return_code = install_mldiagnostics_yaml(artifact_filename=webhook_filename)
14911488
if return_code != 0:
14921489
return return_code
1493-
1490+
14941491
return_code = label_default_namespace_mldiagnostics()
14951492
if return_code != 0:
14961493
return return_code
14971494

14981495
# --- Install Operator ---
1499-
operator_package = "mldiagnostics-connection-operator"
1500-
operator_version = "v0.3.0"
1501-
operator_filename = f"{operator_package}-{operator_version}.yaml"
1502-
1496+
operator_package = 'mldiagnostics-connection-operator'
1497+
operator_version = 'v0.3.0'
1498+
operator_filename = f'{operator_package}-{operator_version}.yaml'
1499+
15031500
return_code = download_mldiagnostics_yaml(package_name=operator_package, version=operator_version)
15041501
if return_code != 0:
15051502
return return_code
1506-
1503+
15071504
return_code = install_mldiagnostics_yaml(artifact_filename=operator_filename)
15081505
if return_code != 0:
15091506
return return_code
1510-
1511-
xpk_print("All diagon installation and setup steps have been successfully completed!")
1507+
1508+
xpk_print('All diagon installation and setup steps have been successfully completed!')
15121509
return return_code
15131510
else:
1514-
xpk_print("The cert-manager-webhook installation failed.")
1511+
xpk_print('The cert-manager-webhook installation failed.')
15151512
xpk_exit(1)
15161513
else:
1517-
xpk_print(f"Application {deployment_name} failed to become ready within the timeout.")
1514+
xpk_print(f'Application {deployment_name} failed to become ready within the timeout.')
15181515
xpk_exit(1)
15191516

15201517
def wait_for_deployment_ready(deployment_name: str, namespace: str, timeout_seconds: int = 300) -> bool:
@@ -1530,28 +1527,22 @@ def wait_for_deployment_ready(deployment_name: str, namespace: str, timeout_seco
15301527
Returns:
15311528
bool: True if the Deployment successfully rolled out, False otherwise (timeout or error).
15321529
"""
1533-
1530+
15341531
command = (
15351532
f'kubectl rollout status deployment/{deployment_name} -n {namespace}'
15361533
f' --timeout={timeout_seconds}s'
15371534
)
1538-
1539-
print(f"Waiting for deployment {deployment_name} in namespace {namespace} to successfully roll out...")
15401535

1541-
try:
1542-
return_code, return_output = run_command_for_value(
1543-
command, f'Checking status of deployment {deployment_name}...'
1544-
)
1536+
print(f'Waiting for deployment {deployment_name} in namespace {namespace} to successfully roll out...')
15451537

1546-
if return_code != 0:
1547-
xpk_print(f"\nError: Deployment {deployment_name} failed to roll out.")
1548-
xpk_print(f"kubectl output: {return_output}")
1549-
return False
1538+
return_code, return_output = run_command_for_value(
1539+
command, f'Checking status of deployment {deployment_name}...'
1540+
)
15501541

1551-
xpk_print(f"Success: Deployment {deployment_name} successfully rolled out.")
1552-
return True
1553-
1554-
except Exception as e:
1555-
xpk_print(f"\nUnexpected API request error while checking deployment status: {e}")
1556-
time.sleep(10)
1542+
if return_code != 0:
1543+
xpk_print(f'\nError: Deployment {deployment_name} failed to roll out.')
1544+
xpk_print(f'kubectl output: {return_output}')
15571545
return False
1546+
1547+
xpk_print(f'Success: Deployment {deployment_name} successfully rolled out.')
1548+
return True

0 commit comments

Comments
 (0)