Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 28 additions & 51 deletions src/kube_galaxy/cmd/status.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
"""Status command handler."""

import shutil
from collections.abc import Callable

import typer

from kube_galaxy.pkg.utils.client import (
get_cluster_info,
get_context,
get_nodes,
get_pods,
wait_for_nodes,
wait_for_pods,
)
from kube_galaxy.pkg.utils.errors import ClusterError
from kube_galaxy.pkg.utils.logging import error, info, print_dict, section, success, warning
from kube_galaxy.pkg.utils.shell import ShellError, run
from kube_galaxy.pkg.utils.shell import run


def status(wait: bool = False, timeout: int = 300) -> None:
Expand All @@ -25,7 +35,6 @@ def _print_dependency_status() -> None:
info("Dependencies:")
deps = {
"kubectl": _check_command("kubectl"),
"kubeadm": _check_command("kubeadm"),
"spread": _check_command("spread"),
}
print_dict(deps)
Expand All @@ -39,22 +48,17 @@ def _print_cluster_context() -> None:

info("")
try:
result = run(["kubectl", "config", "current-context"], capture_output=True, check=False)
context = result.stdout.strip() if result.returncode == 0 else "none"
context = get_context()
info(f"Active Cluster: {context}")
except Exception:
info("Active Cluster: error checking")

try:
result = run(["kubectl", "get", "nodes"], capture_output=True, check=False)
if result.returncode == 0 and result.stdout:
lines = result.stdout.strip().split("\n")
nodes_output = get_nodes()
if nodes_output:
lines = nodes_output.strip().split("\n")
info(f"Cluster Nodes: {len(lines) - 1}")
for line in lines[1:]:
if line:
info(f" {line}")
except Exception:
pass
except ClusterError:
info("Active Cluster: error checking")


def _verify_cluster_health(timeout: int) -> None:
Expand All @@ -63,52 +67,31 @@ def _verify_cluster_health(timeout: int) -> None:
error("kubectl is required for --wait health checks", show_traceback=False)
raise typer.Exit(code=1)

timeout_arg = f"--timeout={timeout}s"
section("Cluster Health Verification")
info("Waiting for nodes to be Ready...")

try:
run(
["kubectl", "wait", "--for=condition=Ready", "node", "--all", timeout_arg],
capture_output=True,
)
run(
[
"kubectl",
"wait",
"--for=condition=Ready",
"pod",
"--all",
"-n",
"kube-system",
timeout_arg,
],
capture_output=True,
)
except ShellError as exc:
if exc.stderr.strip():
error(exc.stderr.strip(), show_traceback=False)
wait_for_nodes(timeout=timeout)
wait_for_pods(namespace="kube-system", timeout=timeout)
except ClusterError as exc:
error(str(exc), show_traceback=False)
error("Cluster readiness checks failed", show_traceback=False)
raise typer.Exit(code=1) from exc

_print_command_output(["kubectl", "cluster-info"], "Cluster Info")
_print_command_output(["kubectl", "get", "nodes", "-o", "wide"], "Nodes")
_print_command_output(["kubectl", "get", "pods", "-A", "-o", "wide"], "Pods")
_print_command_output(get_cluster_info, "Cluster Info")
_print_command_output(get_nodes, "Nodes")
_print_command_output(get_pods, "Pods")


def _print_command_output(command: list[str], title: str) -> None:
def _print_command_output(command: Callable[[], str], title: str) -> None:
"""Run command and print its output with a section label."""
info("")
info(f"{title}:")
try:
result = run(command, capture_output=True)
output = result.stdout.strip()
if output:
if output := command().strip():
info(output)
except ShellError as exc:
if exc.stderr.strip():
error(exc.stderr.strip(), show_traceback=False)
error(f"Failed to run: {' '.join(command)}", show_traceback=False)
except ClusterError as exc:
error(f"Failed to run: {title}", show_traceback=False)
raise typer.Exit(code=1) from exc


Expand All @@ -122,12 +105,6 @@ def _check_command(cmd: str) -> str:
capture_output=True,
check=False,
)
elif cmd == "kubeadm":
result = run(
[cmd, "version"],
capture_output=True,
check=False,
)
else:
result = run(
[cmd, "--version"],
Expand Down
24 changes: 5 additions & 19 deletions src/kube_galaxy/cmd/test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Test command handler."""

import subprocess
from pathlib import Path

import typer
Expand All @@ -9,6 +8,8 @@
from kube_galaxy.pkg.manifest.loader import load_manifest
from kube_galaxy.pkg.manifest.validator import validate_manifest
from kube_galaxy.pkg.testing.spread import collect_test_results, run_spread_tests
from kube_galaxy.pkg.utils.client import get_context, verify_connectivity
from kube_galaxy.pkg.utils.errors import ClusterError
from kube_galaxy.pkg.utils.logging import error, exception, info, section, success, warning


Expand All @@ -22,25 +23,10 @@ def spread(manifest_path: str) -> None:

try:
# Check if kubectl can connect
result = subprocess.run(
["kubectl", "cluster-info"],
capture_output=True,
text=True,
check=False,
)
if result.returncode != 0:
error("No Kubernetes cluster available. Please set up a cluster first.")
info("You can create a test cluster with: kube-galaxy setup")
raise typer.Exit(code=1)
verify_connectivity()

# Get cluster context
result = subprocess.run(
["kubectl", "config", "current-context"],
capture_output=True,
text=True,
check=True,
)
cluster_context = result.stdout.strip()
cluster_context = get_context()
success(f"Connected to cluster: {cluster_context}")

# Run spread tests from manifest
Expand All @@ -57,7 +43,7 @@ def spread(manifest_path: str) -> None:

success("Spread tests completed")

except Exception as e:
except ClusterError as e:
exception("Spread tests failed", e)
raise typer.Exit(code=1) from e

Expand Down
9 changes: 6 additions & 3 deletions src/kube_galaxy/pkg/components/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@
from kube_galaxy.pkg.arch.detector import ArchInfo
from kube_galaxy.pkg.literals import Commands, Permissions, SystemPaths, Timeouts
from kube_galaxy.pkg.manifest.models import ComponentConfig, InstallMethod, Manifest
from kube_galaxy.pkg.utils.client import apply_manifest
from kube_galaxy.pkg.utils.components import (
download_file,
extract_archive,
format_component_pattern,
install_binary,
)
from kube_galaxy.pkg.utils.errors import ComponentError
from kube_galaxy.pkg.utils.errors import ClusterError, ComponentError
from kube_galaxy.pkg.utils.logging import info
from kube_galaxy.pkg.utils.shell import run

Expand Down Expand Up @@ -243,8 +244,10 @@ def bootstrap_hook(self) -> None:
raise ComponentError(
f"{comp_name} manifest not downloaded. Run download hook first."
)
run(["kubectl", "apply", "-f", str(self.manifest_path)], check=True)
info(f"Applied manifest for {comp_name}")
try:
apply_manifest(self.manifest_path)
except ClusterError as e:
raise ComponentError(f"Failed to apply manifest for {comp_name}") from e

pass

Expand Down
28 changes: 8 additions & 20 deletions src/kube_galaxy/pkg/components/kubeadm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@

from kube_galaxy.pkg.components import ClusterComponentBase, register_component
from kube_galaxy.pkg.literals import Commands, SystemPaths, URLs
from kube_galaxy.pkg.utils.client import (
get_api_server_status,
verify_connectivity,
wait_for_nodes,
)
from kube_galaxy.pkg.utils.errors import ComponentError
from kube_galaxy.pkg.utils.logging import info
from kube_galaxy.pkg.utils.shell import run
Expand Down Expand Up @@ -184,26 +189,9 @@ def verify_hook(self) -> None:

Checks cluster connectivity and waits for nodes/pods to be ready.
"""

# Check cluster info
run(["kubectl", "cluster-info"], check=True)

# Wait for nodes to be ready
run(
["kubectl", "wait", "--for=condition=Ready", "nodes", "--all", "--timeout=300s"],
check=True,
)

# Wait for api-server to be ready
run(
[
"kubectl",
"get",
"--raw=/readyz",
"--request-timeout=300s",
],
check=True,
)
verify_connectivity()
wait_for_nodes(timeout=300)
get_api_server_status(timeout=300)

def stop_hook(self) -> None:
"""
Expand Down
67 changes: 10 additions & 57 deletions src/kube_galaxy/pkg/testing/spread.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
task_path_for_component,
validate_component_test_structure,
)
from kube_galaxy.pkg.utils.client import create_namespace, delete_namespace, verify_connectivity
from kube_galaxy.pkg.utils.errors import ClusterError
from kube_galaxy.pkg.utils.logging import error, info, section, success, warning
from kube_galaxy.pkg.utils.shell import ShellError, run
Expand Down Expand Up @@ -111,9 +112,7 @@ def run_spread_tests(
def _verify_test_prerequisites() -> None:
"""Verify kubectl and spread are available."""
try:
info("Verifying cluster connectivity...")
run(["kubectl", "cluster-info"], check=True, capture_output=True)
success("Connected to Kubernetes cluster")
verify_connectivity()

# Check for spread
info("Verifying spread test framework...")
Expand Down Expand Up @@ -147,54 +146,12 @@ def _create_test_namespace(component_name: str) -> str:
# Normalize component name for namespace (lowercase, hyphens only)
namespace = f"kube-galaxy-test-{component_name.lower().replace('_', '-')}"

try:
info(f" Creating test namespace: {namespace}")

# Apply with labels
run(["kubectl", "create", "namespace", namespace], check=True)

# Label namespace
label = "app.kubernetes.io/managed-by=kube-galaxy"
run(
["kubectl", "label", "namespace", namespace, label, f"component={component_name}"],
check=True,
)

success(f"Namespace created: {namespace}")
return namespace

except ShellError as exc:
raise ClusterError(f"Failed to create namespace {namespace}: {exc}") from exc


def _cleanup_test_namespace(namespace: str, timeout: int = 60) -> None:
"""
Delete test namespace and wait for termination.

Args:
namespace: Namespace to delete
timeout: Maximum seconds to wait for deletion

Raises:
ClusterError: If namespace deletion fails
"""
try:
info(f" Cleaning up namespace: {namespace}")

# Delete namespace
run(
["kubectl", "delete", "namespace", namespace, "--timeout", f"{timeout}s"],
check=True,
)

success(f"Namespace deleted: {namespace}")

except ShellError as exc:
# Don't fail if namespace doesn't exist
if "not found" in str(exc):
warning(f" Namespace {namespace} not found (may already be deleted)")
else:
raise ClusterError(f"Failed to delete namespace {namespace}: {exc}") from exc
labels = {
"app.kubernetes.io/managed-by": "kube-galaxy",
"component": component_name,
}
create_namespace(namespace, labels)
return namespace


def _generate_orchestration_spread_yaml(
Expand Down Expand Up @@ -350,11 +307,7 @@ def _run_component_tests(
raise ClusterError("Component validation failed")

# Generate orchestration spread.yaml
try:
component_suites = _generate_orchestration_spread_yaml(spread_components, kubeconfig)
except ClusterError as exc:
error(f"Failed to generate orchestration spread.yaml: {exc}")
raise
component_suites = _generate_orchestration_spread_yaml(spread_components, kubeconfig)

# Track test results
test_results = []
Expand Down Expand Up @@ -412,7 +365,7 @@ def _run_component_tests(
# Step 4: Cleanup namespace (always executed)
if namespace:
try:
_cleanup_test_namespace(namespace)
delete_namespace(namespace)
except Exception as cleanup_exc:
warning(f" Namespace cleanup failed: {cleanup_exc}")

Expand Down
Loading
Loading