Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 13 additions & 49 deletions src/kube_galaxy/cmd/status.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

import typer

from kube_galaxy.pkg.utils.client import get_context, get_nodes, wait_for_nodes, wait_for_pods
from kube_galaxy.pkg.utils.errors import ClusterError
from kube_galaxy.pkg.utils.logging import error, info, print_dict, section, success, warning
from kube_galaxy.pkg.utils.shell import ShellError, run
from kube_galaxy.pkg.utils.shell import run


def status(wait: bool = False, timeout: int = 300) -> None:
Expand Down Expand Up @@ -39,21 +41,20 @@ def _print_cluster_context() -> None:

info("")
try:
result = run(["kubectl", "config", "current-context"], capture_output=True, check=False)
context = result.stdout.strip() if result.returncode == 0 else "none"
context = get_context()
info(f"Active Cluster: {context}")
except Exception:
except ClusterError:
info("Active Cluster: error checking")

try:
result = run(["kubectl", "get", "nodes"], capture_output=True, check=False)
if result.returncode == 0 and result.stdout:
lines = result.stdout.strip().split("\n")
nodes_output = get_nodes()
if nodes_output:
lines = nodes_output.strip().split("\n")
info(f"Cluster Nodes: {len(lines) - 1}")
for line in lines[1:]:
if line:
info(f" {line}")
except Exception:
except ClusterError:
pass


Expand All @@ -63,54 +64,17 @@ def _verify_cluster_health(timeout: int) -> None:
error("kubectl is required for --wait health checks", show_traceback=False)
raise typer.Exit(code=1)

timeout_arg = f"--timeout={timeout}s"
section("Cluster Health Verification")
info("Waiting for nodes to be Ready...")

try:
run(
["kubectl", "wait", "--for=condition=Ready", "node", "--all", timeout_arg],
capture_output=True,
)
run(
[
"kubectl",
"wait",
"--for=condition=Ready",
"pod",
"--all",
"-n",
"kube-system",
timeout_arg,
],
capture_output=True,
)
except ShellError as exc:
if exc.stderr.strip():
error(exc.stderr.strip(), show_traceback=False)
wait_for_nodes(timeout=timeout)
wait_for_pods(namespace="kube-system", timeout=timeout)
except ClusterError as exc:
error(str(exc), show_traceback=False)
error("Cluster readiness checks failed", show_traceback=False)
raise typer.Exit(code=1) from exc

_print_command_output(["kubectl", "cluster-info"], "Cluster Info")
_print_command_output(["kubectl", "get", "nodes", "-o", "wide"], "Nodes")
_print_command_output(["kubectl", "get", "pods", "-A", "-o", "wide"], "Pods")


def _print_command_output(command: list[str], title: str) -> None:
"""Run command and print its output with a section label."""
info("")
info(f"{title}:")
try:
result = run(command, capture_output=True)
output = result.stdout.strip()
if output:
info(output)
except ShellError as exc:
if exc.stderr.strip():
error(exc.stderr.strip(), show_traceback=False)
error(f"Failed to run: {' '.join(command)}", show_traceback=False)
raise typer.Exit(code=1) from exc


def _check_command(cmd: str) -> str:
"""Check if a command is installed and return status."""
Expand Down
24 changes: 5 additions & 19 deletions src/kube_galaxy/cmd/test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Test command handler."""

import subprocess
from pathlib import Path

import typer
Expand All @@ -9,6 +8,8 @@
from kube_galaxy.pkg.manifest.loader import load_manifest
from kube_galaxy.pkg.manifest.validator import validate_manifest
from kube_galaxy.pkg.testing.spread import collect_test_results, run_spread_tests
from kube_galaxy.pkg.utils.client import get_context, verify_connectivity
from kube_galaxy.pkg.utils.errors import ClusterError
from kube_galaxy.pkg.utils.logging import error, exception, info, section, success, warning


Expand All @@ -22,25 +23,10 @@ def spread(manifest_path: str) -> None:

try:
# Check if kubectl can connect
result = subprocess.run(
["kubectl", "cluster-info"],
capture_output=True,
text=True,
check=False,
)
if result.returncode != 0:
error("No Kubernetes cluster available. Please set up a cluster first.")
info("You can create a test cluster with: kube-galaxy setup")
raise typer.Exit(code=1)
verify_connectivity()

# Get cluster context
result = subprocess.run(
["kubectl", "config", "current-context"],
capture_output=True,
text=True,
check=True,
)
cluster_context = result.stdout.strip()
cluster_context = get_context()
success(f"Connected to cluster: {cluster_context}")

# Run spread tests from manifest
Expand All @@ -57,7 +43,7 @@ def spread(manifest_path: str) -> None:

success("Spread tests completed")

except Exception as e:
except ClusterError as e:
exception("Spread tests failed", e)
raise typer.Exit(code=1) from e

Expand Down
4 changes: 2 additions & 2 deletions src/kube_galaxy/pkg/components/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from kube_galaxy.pkg.arch.detector import ArchInfo
from kube_galaxy.pkg.literals import Commands, Permissions, SystemPaths, Timeouts
from kube_galaxy.pkg.manifest.models import ComponentConfig, InstallMethod, Manifest
from kube_galaxy.pkg.utils.client import apply_manifest
from kube_galaxy.pkg.utils.components import (
download_file,
extract_archive,
Expand Down Expand Up @@ -243,8 +244,7 @@ def bootstrap_hook(self) -> None:
raise ComponentError(
f"{comp_name} manifest not downloaded. Run download hook first."
)
run(["kubectl", "apply", "-f", str(self.manifest_path)], check=True)
info(f"Applied manifest for {comp_name}")
apply_manifest(self.manifest_path)
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The bootstrap_hook() in _base.py now propagates ClusterError from apply_manifest() when the manifest installation method is CONTAINER_MANIFEST. The rest of the component lifecycle hooks in this file (and throughout the codebase) only raise ComponentError. This breaks the expected error contract for the hook interface. The apply_manifest() call should either catch ClusterError and re-raise it as ComponentError, or the docstring for bootstrap_hook should be updated to document that it may also raise ClusterError.

Copilot uses AI. Check for mistakes.

pass

Expand Down
28 changes: 8 additions & 20 deletions src/kube_galaxy/pkg/components/kubeadm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@

from kube_galaxy.pkg.components import ClusterComponentBase, register_component
from kube_galaxy.pkg.literals import Commands, SystemPaths, URLs
from kube_galaxy.pkg.utils.client import (
get_api_server_status,
verify_connectivity,
wait_for_nodes,
)
from kube_galaxy.pkg.utils.errors import ComponentError
from kube_galaxy.pkg.utils.logging import info
from kube_galaxy.pkg.utils.shell import run
Expand Down Expand Up @@ -184,26 +189,9 @@ def verify_hook(self) -> None:

Checks cluster connectivity and waits for nodes/pods to be ready.
"""

# Check cluster info
run(["kubectl", "cluster-info"], check=True)

# Wait for nodes to be ready
run(
["kubectl", "wait", "--for=condition=Ready", "nodes", "--all", "--timeout=300s"],
check=True,
)

# Wait for api-server to be ready
run(
[
"kubectl",
"get",
"--raw=/readyz",
"--request-timeout=300s",
],
check=True,
)
verify_connectivity()
wait_for_nodes(timeout=300)
get_api_server_status(timeout=300)

def stop_hook(self) -> None:
"""
Expand Down
46 changes: 12 additions & 34 deletions src/kube_galaxy/pkg/testing/spread.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
task_path_for_component,
validate_component_test_structure,
)
from kube_galaxy.pkg.utils.client import create_namespace, delete_namespace, verify_connectivity
from kube_galaxy.pkg.utils.errors import ClusterError
from kube_galaxy.pkg.utils.logging import error, info, section, success, warning
from kube_galaxy.pkg.utils.shell import ShellError, run
Expand Down Expand Up @@ -111,9 +112,7 @@ def run_spread_tests(
def _verify_test_prerequisites() -> None:
"""Verify kubectl and spread are available."""
try:
info("Verifying cluster connectivity...")
run(["kubectl", "cluster-info"], check=True, capture_output=True)
success("Connected to Kubernetes cluster")
verify_connectivity()

# Check for spread
info("Verifying spread test framework...")
Expand Down Expand Up @@ -148,23 +147,15 @@ def _create_test_namespace(component_name: str) -> str:
namespace = f"kube-galaxy-test-{component_name.lower().replace('_', '-')}"

try:
info(f" Creating test namespace: {namespace}")

# Apply with labels
run(["kubectl", "create", "namespace", namespace], check=True)

# Label namespace
label = "app.kubernetes.io/managed-by=kube-galaxy"
run(
["kubectl", "label", "namespace", namespace, label, f"component={component_name}"],
check=True,
)

success(f"Namespace created: {namespace}")
labels = {
"app.kubernetes.io/managed-by": "kube-galaxy",
"component": component_name,
}
create_namespace(namespace, labels)
return namespace

except ShellError as exc:
raise ClusterError(f"Failed to create namespace {namespace}: {exc}") from exc
except ClusterError:
raise


def _cleanup_test_namespace(namespace: str, timeout: int = 60) -> None:
Expand All @@ -179,22 +170,9 @@ def _cleanup_test_namespace(namespace: str, timeout: int = 60) -> None:
ClusterError: If namespace deletion fails
"""
try:
info(f" Cleaning up namespace: {namespace}")

# Delete namespace
run(
["kubectl", "delete", "namespace", namespace, "--timeout", f"{timeout}s"],
check=True,
)

success(f"Namespace deleted: {namespace}")

except ShellError as exc:
# Don't fail if namespace doesn't exist
if "not found" in str(exc):
warning(f" Namespace {namespace} not found (may already be deleted)")
else:
raise ClusterError(f"Failed to delete namespace {namespace}: {exc}") from exc
delete_namespace(namespace, timeout)
except ClusterError:
raise


def _generate_orchestration_spread_yaml(
Expand Down
Loading
Loading