diff --git a/.github/actions/install-chart/action.yaml b/.github/actions/install-chart/action.yaml new file mode 100644 index 0000000..a4b4967 --- /dev/null +++ b/.github/actions/install-chart/action.yaml @@ -0,0 +1,81 @@ +name: Install Helm Chart +description: Install the Confidential Containers Helm chart with various configurations + +inputs: + release-name: + description: 'Helm release name' + required: false + default: 'coco' + namespace: + description: 'Kubernetes namespace' + required: false + default: 'kube-system' + extra-args: + description: 'Extra Helm install arguments (e.g., --set flags)' + required: false + default: '' + values-file: + description: 'Path to values file (optional)' + required: false + default: '' + wait-timeout: + description: 'Timeout for helm install --wait' + required: false + default: '15m' + +outputs: + installed: + description: 'Whether installation succeeded' + value: ${{ steps.install.outputs.result }} + +runs: + using: composite + steps: + - name: Update Helm dependencies + shell: bash + run: | + echo "๐Ÿ“ฆ Updating Helm dependencies..." + helm dependency update + echo "โœ… Dependencies updated" + + - name: Validate chart + shell: bash + run: | + echo "๐Ÿ” Validating chart..." + helm lint . + echo "โœ… Chart is valid" + + - name: Install chart + id: install + shell: bash + run: | + echo "๐Ÿš€ Installing chart: ${{ inputs.release-name }}" + echo " Namespace: ${{ inputs.namespace }}" + echo " Extra args: ${{ inputs.extra-args }}" + if [ -n "${{ inputs.values-file }}" ]; then + echo " Values file: ${{ inputs.values-file }}" + fi + + INSTALL_CMD="helm install ${{ inputs.release-name }} . \ + --namespace ${{ inputs.namespace }} \ + --create-namespace \ + --debug" + + if [ -n "${{ inputs.values-file }}" ]; then + INSTALL_CMD="$INSTALL_CMD -f ${{ inputs.values-file }}" + fi + + if [ -n "${{ inputs.extra-args }}" ]; then + INSTALL_CMD="$INSTALL_CMD ${{ inputs.extra-args }}" + fi + + echo "Running: $INSTALL_CMD" + + if eval $INSTALL_CMD; then + echo "โœ… Chart installed successfully" + echo "result=success" >> $GITHUB_OUTPUT + else + echo "โŒ Chart installation failed" + echo "result=failed" >> $GITHUB_OUTPUT + exit 1 + fi diff --git a/.github/actions/run-test-pod/action.yaml b/.github/actions/run-test-pod/action.yaml new file mode 100644 index 0000000..dfdb6ad --- /dev/null +++ b/.github/actions/run-test-pod/action.yaml @@ -0,0 +1,240 @@ +name: Run Test Pod +description: Deploy and verify a test pod using Kata runtime + +inputs: + runtime-class: + description: 'RuntimeClass to use for the test pod' + required: true + namespace: + description: 'Kubernetes namespace for test pod' + required: false + default: 'default' + pod-name: + description: 'Name of the test pod' + required: false + default: 'kata-test-pod' + timeout: + description: 'Timeout for pod to become ready' + required: false + default: '5m' + +outputs: + pod-status: + description: 'Final status of the test pod' + value: ${{ steps.check-status.outputs.status }} + +runs: + using: composite + steps: + - name: Verify cluster health + shell: bash + run: | + set -e # Exit on any error + echo "๐Ÿฅ Checking cluster health before running test pod..." + + # Retry kubectl cluster-info with exponential backoff + MAX_RETRIES=5 + RETRY_DELAY=2 + + for attempt in $(seq 1 $MAX_RETRIES); do + echo " Attempt $attempt/$MAX_RETRIES: Checking API server..." + + if kubectl cluster-info 2>&1 | grep -q "is running"; then + echo "โœ… Kubernetes API server is responding" + break + else + if [ $attempt -eq $MAX_RETRIES ]; then + echo "โŒ Kubernetes API server is not responding after $MAX_RETRIES attempts" + echo "" + echo "Cluster info:" + kubectl cluster-info dump --output-directory=/tmp/cluster-info --namespaces=kube-system 2>&1 || true + echo "" + echo "System pods:" + kubectl get pods -n kube-system 2>&1 || echo "Failed to get pods" + echo "" + echo "Nodes:" + kubectl get nodes 2>&1 || echo "Failed to get nodes" + exit 1 + fi + echo " API server not ready, waiting ${RETRY_DELAY}s..." + sleep $RETRY_DELAY + RETRY_DELAY=$((RETRY_DELAY * 2)) + fi + done + + # Check nodes are ready + echo "" + echo "๐Ÿ“‹ Node status:" + kubectl get nodes + + NOT_READY_NODES=$(kubectl get nodes --no-headers 2>/dev/null | grep -v " Ready " | wc -l || echo "0") + NOT_READY_NODES=$(echo "$NOT_READY_NODES" | tr -d ' \n') # Remove spaces and newlines + if [ "$NOT_READY_NODES" -gt 0 ] 2>/dev/null || [ "$NOT_READY_NODES" = "" ]; then + echo "โš ๏ธ Warning: Some nodes are not ready" + kubectl get nodes + fi + + echo "" + echo "๐Ÿ“‹ System pods status:" + kubectl get pods -n kube-system + + echo "โœ… Cluster health check passed" + + - name: Create test pod + shell: bash + run: | + set -e # Exit on any error + echo "๐Ÿš€ Creating test pod with RuntimeClass: ${{ inputs.runtime-class }}" + + cat > /tmp/test-pod.yaml <&1; then + echo "โœ… Test pod created successfully" + break + else + if [ $attempt -eq $MAX_RETRIES ]; then + echo "โŒ Failed to create test pod after $MAX_RETRIES attempts" + echo "" + echo "Checking API server:" + kubectl cluster-info || true + exit 1 + fi + echo " Failed to create pod, waiting ${RETRY_DELAY}s before retry..." + sleep $RETRY_DELAY + RETRY_DELAY=$((RETRY_DELAY * 2)) + fi + done + + - name: Wait for pod + shell: bash + run: | + set -e # Exit on any error + echo "โณ Waiting for pod to start (timeout: ${{ inputs.timeout }})..." + + # Wait for pod to be scheduled + SUCCESS=false + for i in {1..30}; do + POD_PHASE=$(kubectl get pod ${{ inputs.pod-name }} -n ${{ inputs.namespace }} -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound") + echo " Attempt $i/30: Pod phase is: $POD_PHASE" + + if [ "$POD_PHASE" = "Running" ] || [ "$POD_PHASE" = "Succeeded" ]; then + echo "โœ… Pod is in $POD_PHASE state" + SUCCESS=true + break + elif [ "$POD_PHASE" = "Failed" ]; then + echo "โŒ Pod failed" + kubectl describe pod ${{ inputs.pod-name }} -n ${{ inputs.namespace }} + exit 1 + fi + + sleep 10 + done + + if [ "$SUCCESS" = "false" ]; then + echo "โŒ Timeout: Pod did not reach Running/Succeeded state" + kubectl describe pod ${{ inputs.pod-name }} -n ${{ inputs.namespace }} + exit 1 + fi + + - name: Check pod status + id: check-status + shell: bash + run: | + set -e # Exit on any error + echo "๐Ÿ” Checking final pod status..." + + kubectl get pod ${{ inputs.pod-name }} -n ${{ inputs.namespace }} + + POD_PHASE=$(kubectl get pod ${{ inputs.pod-name }} -n ${{ inputs.namespace }} -o jsonpath='{.status.phase}') + + echo "" + echo "Pod phase: $POD_PHASE" + + if [ "$POD_PHASE" = "Running" ] || [ "$POD_PHASE" = "Succeeded" ]; then + echo "โœ… Pod reached $POD_PHASE state successfully" + echo "status=success" >> $GITHUB_OUTPUT + else + echo "โŒ Pod did not reach Running/Succeeded state (current: $POD_PHASE)" + echo "status=failed" >> $GITHUB_OUTPUT + kubectl describe pod ${{ inputs.pod-name }} -n ${{ inputs.namespace }} + exit 1 + fi + + - name: Show pod details + shell: bash + run: | + echo "๐Ÿ“‹ Pod details:" + kubectl describe pod ${{ inputs.pod-name }} -n ${{ inputs.namespace }} + + - name: Show pod logs + shell: bash + run: | + echo "๐Ÿ“‹ Pod logs:" + kubectl logs ${{ inputs.pod-name }} -n ${{ inputs.namespace }} || echo "No logs available yet" + + - name: Verify Kata runtime is used + shell: bash + run: | + set -e # Exit on any error + echo "๐Ÿ” Verifying Kata runtime is actually being used..." + + # Get the node the pod is running on + NODE=$(kubectl get pod ${{ inputs.pod-name }} -n ${{ inputs.namespace }} -o jsonpath='{.spec.nodeName}') + if [ -z "$NODE" ]; then + echo "โŒ Failed to get node name for pod" + exit 1 + fi + echo "Pod is running on node: $NODE" + + # In kind, we can check the container runtime via docker + if command -v docker &> /dev/null; then + echo "" + echo "Container processes on the node:" + docker exec ${NODE} ps aux | grep -E "containerd|qemu" | head -10 || true + fi + + # Check RuntimeClass in pod spec + RUNTIME_CLASS=$(kubectl get pod ${{ inputs.pod-name }} -n ${{ inputs.namespace }} -o jsonpath='{.spec.runtimeClassName}') + echo "" + echo "Pod RuntimeClass: $RUNTIME_CLASS" + echo "Expected RuntimeClass: ${{ inputs.runtime-class }}" + + if [ "$RUNTIME_CLASS" = "${{ inputs.runtime-class }}" ]; then + echo "โœ… Pod is using the correct RuntimeClass" + else + echo "โŒ Pod RuntimeClass mismatch!" + echo " Expected: ${{ inputs.runtime-class }}" + echo " Got: $RUNTIME_CLASS" + kubectl describe pod ${{ inputs.pod-name }} -n ${{ inputs.namespace }} + exit 1 + fi + + - name: Cleanup test pod + if: always() + shell: bash + run: | + echo "๐Ÿ—‘๏ธ Cleaning up test pod..." + kubectl delete pod ${{ inputs.pod-name }} -n ${{ inputs.namespace }} --ignore-not-found=true + echo "โœ… Test pod cleaned up" diff --git a/.github/actions/setup-k8s-k0s/action.yaml b/.github/actions/setup-k8s-k0s/action.yaml new file mode 100644 index 0000000..104edb4 --- /dev/null +++ b/.github/actions/setup-k8s-k0s/action.yaml @@ -0,0 +1,100 @@ +name: 'Setup K0s Kubernetes' +description: 'Install K0s Kubernetes distribution' +inputs: + extra-params: + description: 'Extra parameters to pass to k0s install' + required: false + default: '' +runs: + using: 'composite' + steps: + - name: Free up disk space + shell: bash + run: | + echo "๐Ÿงน Removing unnecessary directories to free up disk space..." + sudo rm -rf /usr/local/.ghcup + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/share/boost + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/lib/jvm + sudo rm -rf /usr/share/swift + sudo rm -rf /usr/local/share/powershell + sudo rm -rf /usr/local/julia* + sudo rm -rf /opt/az + sudo rm -rf /usr/local/share/chromium + sudo rm -rf /opt/microsoft + sudo rm -rf /opt/google + sudo rm -rf /usr/lib/firefox + echo "โœ… Disk space freed up" + df -h / | grep -v Filesystem + + - name: Install K0s + shell: bash + run: | + echo "๐Ÿ“ฆ Installing K0s..." + curl -sSLf https://get.k0s.sh | sudo sh + + # Install k0s controller + sudo k0s install controller --single ${{ inputs.extra-params }} + + # kube-router uses :8080 for metrics, which causes issues in k0s 1.30.0+ + # Change metricsPort to :9999 to avoid conflicts + sudo mkdir -p /etc/k0s + k0s config create | sudo tee /etc/k0s/k0s.yaml + sudo sed -i -e "s/metricsPort: 8080/metricsPort: 9999/g" /etc/k0s/k0s.yaml + + sudo k0s start + + echo "โณ Waiting for K0s to be ready..." + sleep 120 + + - name: Setup kubectl + shell: bash + run: | + echo "๐Ÿ”ง Setting up kubectl..." + + # Download the kubectl binary into /usr/bin + ARCH=$(uname -m) + case "${ARCH}" in + x86_64) ARCH="amd64" ;; + aarch64) ARCH="arm64" ;; + esac + + kubectl_version=$(sudo k0s kubectl version 2>/dev/null | grep "Client Version" | sed -e 's/Client Version: //') + sudo curl -fL --progress-bar -o /usr/bin/kubectl https://dl.k8s.io/release/"${kubectl_version}"/bin/linux/"${ARCH}"/kubectl + sudo chmod +x /usr/bin/kubectl + + mkdir -p ~/.kube + sudo cp /var/lib/k0s/pki/admin.conf ~/.kube/config + sudo chown "${USER}":"${USER}" ~/.kube/config + + echo "โœ… kubectl installed: $(kubectl version --client)" + + - name: Verify cluster + shell: bash + run: | + echo "๐Ÿ” Verifying K0s cluster..." + kubectl get nodes + kubectl get pods -A + + # Wait for system pods to be ready (excluding completed/job pods) + echo "โณ Waiting for system pods..." + # Wait only for Running pods (not Completed/Job pods) + kubectl wait --for=condition=Ready pods --all -n kube-system --timeout=5m \ + --field-selector=status.phase!=Succeeded,status.phase!=Failed || true + + # Verify all pods are either Running or Completed + NOT_READY=$(kubectl get pods -n kube-system -o json | \ + jq -r '.items[] | select(.status.phase != "Running" and .status.phase != "Succeeded") | .metadata.name') + + if [ -n "$NOT_READY" ]; then + echo "โŒ Some pods are not ready:" + echo "$NOT_READY" + kubectl get pods -A + exit 1 + fi + + echo "โœ… K0s cluster is ready!" diff --git a/.github/actions/setup-k8s-k3s/action.yaml b/.github/actions/setup-k8s-k3s/action.yaml new file mode 100644 index 0000000..135be93 --- /dev/null +++ b/.github/actions/setup-k8s-k3s/action.yaml @@ -0,0 +1,94 @@ +name: 'Setup K3s Kubernetes' +description: 'Install K3s Kubernetes distribution' +inputs: + extra-params: + description: 'Extra parameters to pass to k3s installer' + required: false + default: '' +runs: + using: 'composite' + steps: + - name: Free up disk space + shell: bash + run: | + echo "๐Ÿงน Removing unnecessary directories to free up disk space..." + sudo rm -rf /usr/local/.ghcup + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/share/boost + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/lib/jvm + sudo rm -rf /usr/share/swift + sudo rm -rf /usr/local/share/powershell + sudo rm -rf /usr/local/julia* + sudo rm -rf /opt/az + sudo rm -rf /usr/local/share/chromium + sudo rm -rf /opt/microsoft + sudo rm -rf /opt/google + sudo rm -rf /usr/lib/firefox + echo "โœ… Disk space freed up" + df -h / | grep -v Filesystem + + - name: Install K3s + shell: bash + run: | + echo "๐Ÿ“ฆ Installing K3s..." + curl -sfL https://get.k3s.io | sh -s - --write-kubeconfig-mode 644 ${{ inputs.extra-params }} + + echo "โณ Waiting for K3s to be ready..." + sleep 120 + + - name: Setup kubectl + shell: bash + run: | + echo "๐Ÿ”ง Setting up kubectl..." + + # Download the kubectl binary into /usr/bin and remove /usr/local/bin/kubectl + # We need to do this to avoid hitting issues like: + # error: open /etc/rancher/k3s/k3s.yaml.lock: permission denied + # Which happens because k3s links `/usr/local/bin/kubectl` to `/usr/local/bin/k3s`, + # and that does extra stuff that vanilla `kubectl` doesn't do. + + ARCH=$(uname -m) + case "${ARCH}" in + x86_64) ARCH="amd64" ;; + aarch64) ARCH="arm64" ;; + esac + + kubectl_version=$(/usr/local/bin/k3s kubectl version --client=true 2>/dev/null | grep "Client Version" | sed -e 's/Client Version: //' -e 's/+k3s[0-9]\+//') + sudo curl -fL --progress-bar -o /usr/bin/kubectl https://dl.k8s.io/release/"${kubectl_version}"/bin/linux/"${ARCH}"/kubectl + sudo chmod +x /usr/bin/kubectl + sudo rm -rf /usr/local/bin/kubectl + + mkdir -p ~/.kube + cp /etc/rancher/k3s/k3s.yaml ~/.kube/config + + echo "โœ… kubectl installed: $(kubectl version --client)" + + - name: Verify cluster + shell: bash + run: | + echo "๐Ÿ” Verifying K3s cluster..." + kubectl get nodes + kubectl get pods -A + + # Wait for system pods to be ready (excluding completed/job pods) + echo "โณ Waiting for system pods..." + # Wait only for Running pods (not Completed/Job pods) + kubectl wait --for=condition=Ready pods --all -n kube-system --timeout=5m \ + --field-selector=status.phase!=Succeeded,status.phase!=Failed || true + + # Verify all pods are either Running or Completed + NOT_READY=$(kubectl get pods -n kube-system -o json | \ + jq -r '.items[] | select(.status.phase != "Running" and .status.phase != "Succeeded") | .metadata.name') + + if [ -n "$NOT_READY" ]; then + echo "โŒ Some pods are not ready:" + echo "$NOT_READY" + kubectl get pods -A + exit 1 + fi + + echo "โœ… K3s cluster is ready!" diff --git a/.github/actions/setup-k8s-kubeadm/action.yaml b/.github/actions/setup-k8s-kubeadm/action.yaml new file mode 100644 index 0000000..a1a6025 --- /dev/null +++ b/.github/actions/setup-k8s-kubeadm/action.yaml @@ -0,0 +1,373 @@ +name: 'Setup Vanilla Kubernetes (kubeadm)' +description: 'Install Kubernetes using kubeadm with containerd or cri-o' +inputs: + container-runtime: + description: 'Container runtime to use (containerd or crio)' + required: false + default: 'containerd' + runtime-version: + description: 'Container runtime version (major.minor or "latest" for newest release including beta)' + required: false + default: 'latest' +runs: + using: 'composite' + steps: + - name: Free up disk space + shell: bash + run: | + echo "๐Ÿงน Removing unnecessary directories to free up disk space..." + sudo rm -rf /usr/local/.ghcup + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/share/boost + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/lib/jvm + sudo rm -rf /usr/share/swift + sudo rm -rf /usr/local/share/powershell + sudo rm -rf /usr/local/julia* + sudo rm -rf /opt/az + sudo rm -rf /usr/local/share/chromium + sudo rm -rf /opt/microsoft + sudo rm -rf /opt/google + sudo rm -rf /usr/lib/firefox + echo "โœ… Disk space freed up" + df -h / | grep -v Filesystem + + - name: Prepare system + shell: bash + run: | + echo "๐Ÿ”ง Preparing system for Kubernetes..." + + # Install system dependencies + sudo apt-get update + sudo apt-get -y install runc + + # Load required kernel modules + sudo modprobe overlay + sudo modprobe br_netfilter + + # Set network parameters + sudo sysctl -w net.bridge.bridge-nf-call-iptables=1 + sudo sysctl -w net.ipv4.ip_forward=1 + sudo sysctl -w net.bridge.bridge-nf-call-ip6tables=1 + + # Disable swap + sudo swapoff -a + + - name: Install container runtime + shell: bash + run: | + echo "๐Ÿ“ฆ Installing ${{ inputs.container-runtime }} ${{ inputs.runtime-version }}..." + + RUNTIME="${{ inputs.container-runtime }}" + VERSION="${{ inputs.runtime-version }}" + + if [ "${RUNTIME}" = "containerd" ]; then + # Install containerd + ARCH=$(uname -m) + case "${ARCH}" in + x86_64) ARCH="amd64" ;; + aarch64) ARCH="arm64" ;; + esac + + # Setup GitHub API authentication if available + CURL_AUTH_HEADER="" + if [ -n "${GH_TOKEN:-}" ]; then + CURL_AUTH_HEADER="Authorization: Bearer ${GH_TOKEN}" + fi + + # Determine full version + if [ "${VERSION}" = "latest" ]; then + # Get the absolute latest release (including beta/RC) + # Use /releases (not /releases/latest) to get the very latest including pre-releases + # Note: Filter out API releases (tag_name contains "api/") + echo "๐Ÿ” Fetching latest containerd release from GitHub API..." + if [ -n "${CURL_AUTH_HEADER}" ]; then + API_RESPONSE=$(curl -sSf -H "${CURL_AUTH_HEADER}" https://api.github.com/repos/containerd/containerd/releases) + else + API_RESPONSE=$(curl -sSf https://api.github.com/repos/containerd/containerd/releases) + fi + + if [ -z "${API_RESPONSE}" ]; then + echo "โŒ Failed to fetch containerd releases from GitHub API" + exit 1 + fi + + # Filter out API releases (those with "api/" in tag_name) and get the first binary release + FULL_VERSION=$(echo "${API_RESPONSE}" | jq -r '[.[] | select(.tag_name | contains("api/") | not)][0].tag_name // "ERROR"') + + if [ "${FULL_VERSION}" = "ERROR" ] || [ -z "${FULL_VERSION}" ]; then + echo "โŒ Failed to find a valid containerd binary release" + echo "๐Ÿ” Available releases (first 5):" + echo "${API_RESPONSE}" | jq -r '.[0:5][] | {tag_name, name, prerelease}' + exit 1 + fi + + # Remove 'v' prefix + FULL_VERSION=$(echo "${FULL_VERSION}" | sed 's/^v//') + echo "โ„น๏ธ Using latest containerd release: v${FULL_VERSION}" + else + # Get latest patch version for the specified major.minor + # Note: Filter out API releases (tag_name contains "api/") + echo "๐Ÿ” Fetching containerd ${VERSION}.x releases from GitHub API..." + if [ -n "${CURL_AUTH_HEADER}" ]; then + API_RESPONSE=$(curl -sSf -H "${CURL_AUTH_HEADER}" https://api.github.com/repos/containerd/containerd/releases) + else + API_RESPONSE=$(curl -sSf https://api.github.com/repos/containerd/containerd/releases) + fi + + if [ -z "${API_RESPONSE}" ]; then + echo "โŒ Failed to fetch containerd releases from GitHub API" + exit 1 + fi + + # Filter out API releases and find matching version + FULL_VERSION=$(echo "${API_RESPONSE}" | jq -r '[.[] | select(.tag_name | contains("api/") | not)] | .[].tag_name' | grep "^v${VERSION}\." | head -1 | sed 's/^v//') + + if [ -z "${FULL_VERSION}" ]; then + echo "โŒ No containerd ${VERSION}.x release found" + echo "๐Ÿ” Available ${VERSION}.x releases:" + echo "${API_RESPONSE}" | jq -r '[.[] | select(.tag_name | contains("api/") | not) | select(.tag_name | startswith("v'${VERSION}'."))] | .[0:5][] | .tag_name' + exit 1 + fi + + echo "โ„น๏ธ Using latest ${VERSION}.x release: v${FULL_VERSION}" + fi + + if [ -z "${FULL_VERSION}" ]; then + echo "โŒ Failed to find containerd version matching ${VERSION}" + exit 1 + fi + + echo "๐Ÿ“ฅ Downloading containerd ${FULL_VERSION}..." + curl -fsSL -o containerd.tar.gz \ + "https://github.com/containerd/containerd/releases/download/v${FULL_VERSION}/containerd-${FULL_VERSION}-linux-${ARCH}.tar.gz" + + sudo tar -C /usr/local -xzf containerd.tar.gz + rm containerd.tar.gz + + # Install containerd systemd service + sudo curl -fsSL -o /etc/systemd/system/containerd.service \ + https://raw.githubusercontent.com/containerd/containerd/main/containerd.service + + # Configure containerd + sudo mkdir -p /etc/containerd + containerd config default | sed -e 's/SystemdCgroup = false/SystemdCgroup = true/' | sudo tee /etc/containerd/config.toml + + elif [ "${RUNTIME}" = "crio" ]; then + # Install CRI-O from package repository + echo "๐Ÿ“ฅ Installing CRI-O (version aligned with Kubernetes)..." + + # Get the Kubernetes version that will be installed (same as kubeadm) + KUBERNETES_VERSION=$(curl -Ls https://dl.k8s.io/release/stable.txt | cut -d. -f-2) + + if [ -z "${KUBERNETES_VERSION}" ]; then + echo "โŒ Failed to determine Kubernetes version" + exit 1 + fi + + # Use CRI-O version matching the Kubernetes major.minor version + # K8s v1.31 -> CRI-O v1.31 (keep the 'v' prefix for opensuse repos) + CRIO_VERSION="${KUBERNETES_VERSION}" + + echo "โ„น๏ธ Kubernetes version: ${KUBERNETES_VERSION}" + echo "โ„น๏ธ Checking for CRI-O version: ${CRIO_VERSION} (aligned with K8s)" + + # Check if CRI-O repository exists for this version + CRIO_REPO_URL="https://download.opensuse.org/repositories/isv:/cri-o:/stable:/${CRIO_VERSION}/deb/Release.key" + if ! curl -fsSL --head "${CRIO_REPO_URL}" >/dev/null 2>&1; then + echo "โš ๏ธ CRI-O ${CRIO_VERSION} repository not found, finding latest available stable version..." + + # Setup GitHub API authentication if available + CURL_AUTH_HEADER="" + if [ -n "${GH_TOKEN:-}" ]; then + CURL_AUTH_HEADER="Authorization: Bearer ${GH_TOKEN}" + fi + + # Get latest stable CRI-O version from GitHub (keep the v prefix) + echo "๐Ÿ” Fetching latest stable CRI-O release from GitHub API..." + if [ -n "${CURL_AUTH_HEADER}" ]; then + API_RESPONSE=$(curl -sSf -H "${CURL_AUTH_HEADER}" https://api.github.com/repos/cri-o/cri-o/releases) + else + API_RESPONSE=$(curl -sSf https://api.github.com/repos/cri-o/cri-o/releases) + fi + + if [ -z "${API_RESPONSE}" ]; then + echo "โŒ Failed to fetch CRI-O releases from GitHub API" + exit 1 + fi + + CRIO_VERSION=$(echo "${API_RESPONSE}" | jq -r '[.[] | select(.prerelease == false)][0].tag_name') + + if [ -z "${CRIO_VERSION}" ]; then + echo "โŒ Failed to find latest stable CRI-O version" + exit 1 + fi + + echo "โ„น๏ธ Using latest available CRI-O version: ${CRIO_VERSION}" + else + echo "โœ… CRI-O ${CRIO_VERSION} repository found" + fi + + # Install dependencies for adding repositories + echo "๐Ÿ“ฆ Installing dependencies..." + sudo apt-get update + sudo apt-get install -y software-properties-common curl + + # Create keyrings directory + sudo mkdir -p /etc/apt/keyrings + + # Add the Kubernetes repository + echo "๐Ÿ“ฆ Adding Kubernetes repository (${KUBERNETES_VERSION})..." + curl -fsSL https://pkgs.k8s.io/core:/stable:/${KUBERNETES_VERSION}/deb/Release.key | \ + sudo gpg --batch --yes --no-tty --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg + + echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/${KUBERNETES_VERSION}/deb/ /" | \ + sudo tee /etc/apt/sources.list.d/kubernetes.list + + # Add the CRI-O repository + echo "๐Ÿ“ฆ Adding CRI-O repository (${CRIO_VERSION})..." + curl -fsSL https://download.opensuse.org/repositories/isv:/cri-o:/stable:/${CRIO_VERSION}/deb/Release.key | \ + sudo gpg --batch --yes --no-tty --dearmor -o /etc/apt/keyrings/cri-o-apt-keyring.gpg + + echo "deb [signed-by=/etc/apt/keyrings/cri-o-apt-keyring.gpg] https://download.opensuse.org/repositories/isv:/cri-o:/stable:/${CRIO_VERSION}/deb/ /" | \ + sudo tee /etc/apt/sources.list.d/cri-o.list + + # Install CRI-O + echo "๐Ÿ“ฆ Installing CRI-O and dependencies..." + sudo apt-get update + sudo apt-get install -y cri-o cri-tools + + # Configure default capabilities for CRI-O + # See: https://github.com/kata-containers/kata-containers/issues/8034 + echo "โš™๏ธ Configuring CRI-O default capabilities..." + sudo mkdir -p /etc/crio/crio.conf.d/ + printf '%s\n' \ + '[crio]' \ + 'storage_option = [' \ + ' "overlay.skip_mount_home=true",' \ + ']' \ + '[crio.runtime]' \ + 'default_capabilities = [' \ + ' "CHOWN",' \ + ' "DAC_OVERRIDE",' \ + ' "FSETID",' \ + ' "FOWNER",' \ + ' "SETGID",' \ + ' "SETUID",' \ + ' "SETPCAP",' \ + ' "NET_BIND_SERVICE",' \ + ' "KILL",' \ + ' "SYS_CHROOT",' \ + ']' \ + | sudo tee /etc/crio/crio.conf.d/00-default-capabilities.conf >/dev/null + echo "โœ… CRI-O configuration complete" + else + echo "โŒ Unsupported container runtime: ${RUNTIME}" + exit 1 + fi + + # Start the container runtime + sudo systemctl daemon-reload + sudo systemctl enable --now ${RUNTIME} + sudo systemctl restart ${RUNTIME} + + echo "โœ… ${RUNTIME} installed and running" + + - name: Install kubeadm, kubelet, kubectl + shell: bash + run: | + echo "๐Ÿ“ฆ Installing Kubernetes components..." + + # Get the latest stable Kubernetes version + K8S_VERSION=$(curl -Ls https://dl.k8s.io/release/stable.txt | cut -d. -f-2) + + # Add the pkgs.k8s.io repo + curl -fsSL "https://pkgs.k8s.io/core:/stable:/${K8S_VERSION}/deb/Release.key" | \ + sudo gpg --batch --yes --no-tty --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg + + echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/${K8S_VERSION}/deb/ /" | \ + sudo tee /etc/apt/sources.list.d/kubernetes.list + + # Pin the packages to ensure they're downloaded from pkgs.k8s.io repo + cat </dev/null; then + echo "containerd version: $(containerd --version)" + fi + if command -v crio >/dev/null; then + echo "crio version: $(crio --version)" + fi diff --git a/.github/actions/setup-k8s-microk8s/action.yaml b/.github/actions/setup-k8s-microk8s/action.yaml new file mode 100644 index 0000000..aa50398 --- /dev/null +++ b/.github/actions/setup-k8s-microk8s/action.yaml @@ -0,0 +1,90 @@ +name: 'Setup MicroK8s Kubernetes' +description: 'Install MicroK8s Kubernetes distribution' +runs: + using: 'composite' + steps: + - name: Free up disk space + shell: bash + run: | + echo "๐Ÿงน Removing unnecessary directories to free up disk space..." + sudo rm -rf /usr/local/.ghcup + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/share/boost + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/lib/jvm + sudo rm -rf /usr/share/swift + sudo rm -rf /usr/local/share/powershell + sudo rm -rf /usr/local/julia* + sudo rm -rf /opt/az + sudo rm -rf /usr/local/share/chromium + sudo rm -rf /opt/microsoft + sudo rm -rf /opt/google + sudo rm -rf /usr/lib/firefox + echo "โœ… Disk space freed up" + df -h / | grep -v Filesystem + + - name: Install MicroK8s + shell: bash + run: | + echo "๐Ÿ“ฆ Installing MicroK8s (latest from stable channel)..." + sudo snap install microk8s --classic --channel=latest/stable + sudo usermod -a -G microk8s "${USER}" + + # Show installed version + MICROK8S_VERSION=$(sudo microk8s version | head -1) + echo "โ„น๏ธ Installed MicroK8s: ${MICROK8S_VERSION}" + + mkdir -p ~/.kube + sudo microk8s kubectl config view --raw > ~/.kube/config + sudo chown "${USER}":"${USER}" ~/.kube/config + + echo "โณ Waiting for MicroK8s to be ready..." + sudo microk8s status --wait-ready --timeout 300 + + - name: Setup kubectl + shell: bash + run: | + echo "๐Ÿ”ง Setting up kubectl..." + + # Install kubectl + ARCH=$(uname -m) + case "${ARCH}" in + x86_64) ARCH="amd64" ;; + aarch64) ARCH="arm64" ;; + esac + + kubectl_version=$(sudo microk8s version | grep -oe 'v[0-9]\+\(\.[0-9]\+\)*') + sudo curl -fL --progress-bar -o /usr/bin/kubectl https://dl.k8s.io/release/"${kubectl_version}"/bin/linux/"${ARCH}"/kubectl + sudo chmod +x /usr/bin/kubectl + sudo rm -rf /usr/local/bin/kubectl + + echo "โœ… kubectl installed: $(kubectl version --client)" + + - name: Verify cluster + shell: bash + run: | + echo "๐Ÿ” Verifying MicroK8s cluster..." + kubectl get nodes + kubectl get pods -A + + # Wait for system pods to be ready (excluding completed/job pods) + echo "โณ Waiting for system pods..." + # Wait only for Running pods (not Completed/Job pods) + kubectl wait --for=condition=Ready pods --all -n kube-system --timeout=5m \ + --field-selector=status.phase!=Succeeded,status.phase!=Failed || true + + # Verify all pods are either Running or Completed + NOT_READY=$(kubectl get pods -n kube-system -o json | \ + jq -r '.items[] | select(.status.phase != "Running" and .status.phase != "Succeeded") | .metadata.name') + + if [ -n "$NOT_READY" ]; then + echo "โŒ Some pods are not ready:" + echo "$NOT_READY" + kubectl get pods -A + exit 1 + fi + + echo "โœ… MicroK8s cluster is ready!" diff --git a/.github/actions/setup-k8s-rke2/action.yaml b/.github/actions/setup-k8s-rke2/action.yaml new file mode 100644 index 0000000..96031ca --- /dev/null +++ b/.github/actions/setup-k8s-rke2/action.yaml @@ -0,0 +1,83 @@ +name: 'Setup RKE2 Kubernetes' +description: 'Install RKE2 Kubernetes distribution' +inputs: + extra-params: + description: 'Extra parameters for RKE2' + required: false + default: '' +runs: + using: 'composite' + steps: + - name: Free up disk space + shell: bash + run: | + echo "๐Ÿงน Removing unnecessary directories to free up disk space..." + sudo rm -rf /usr/local/.ghcup + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/share/boost + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/lib/jvm + sudo rm -rf /usr/share/swift + sudo rm -rf /usr/local/share/powershell + sudo rm -rf /usr/local/julia* + sudo rm -rf /opt/az + sudo rm -rf /usr/local/share/chromium + sudo rm -rf /opt/microsoft + sudo rm -rf /opt/google + sudo rm -rf /usr/lib/firefox + echo "โœ… Disk space freed up" + df -h / | grep -v Filesystem + + - name: Install RKE2 + shell: bash + run: | + echo "๐Ÿ“ฆ Installing RKE2..." + curl -sfL https://get.rke2.io | sudo sh - + + sudo systemctl enable --now rke2-server.service + + echo "โณ Waiting for RKE2 to be ready..." + sleep 120 + + - name: Setup kubectl + shell: bash + run: | + echo "๐Ÿ”ง Setting up kubectl..." + + # Link the kubectl binary into /usr/local/bin + sudo ln -sf /var/lib/rancher/rke2/bin/kubectl /usr/local/bin/kubectl + + mkdir -p ~/.kube + sudo cp /etc/rancher/rke2/rke2.yaml ~/.kube/config + sudo chown "${USER}":"${USER}" ~/.kube/config + + echo "โœ… kubectl installed: $(kubectl version --client)" + + - name: Verify cluster + shell: bash + run: | + echo "๐Ÿ” Verifying RKE2 cluster..." + kubectl get nodes + kubectl get pods -A + + # Wait for system pods to be ready (excluding completed/job pods) + echo "โณ Waiting for system pods..." + # Wait only for Running pods (not Completed/Job pods) + kubectl wait --for=condition=Ready pods --all -n kube-system --timeout=5m \ + --field-selector=status.phase!=Succeeded,status.phase!=Failed || true + + # Verify all pods are either Running or Completed + NOT_READY=$(kubectl get pods -n kube-system -o json | \ + jq -r '.items[] | select(.status.phase != "Running" and .status.phase != "Succeeded") | .metadata.name') + + if [ -n "$NOT_READY" ]; then + echo "โŒ Some pods are not ready:" + echo "$NOT_READY" + kubectl get pods -A + exit 1 + fi + + echo "โœ… RKE2 cluster is ready!" diff --git a/.github/actions/verify-deployment/action.yaml b/.github/actions/verify-deployment/action.yaml new file mode 100644 index 0000000..fd43457 --- /dev/null +++ b/.github/actions/verify-deployment/action.yaml @@ -0,0 +1,154 @@ +name: Verify Deployment +description: Verify kata-deploy daemonset and RuntimeClasses are created + +inputs: + namespace: + description: 'Kubernetes namespace where chart is installed' + required: false + default: 'kube-system' + expected-runtime-classes: + description: 'Space-separated list of expected RuntimeClass names' + required: false + default: 'kata-qemu-coco-dev' + daemonset-timeout: + description: 'Timeout for daemonset to become ready' + required: false + default: '15m' + daemonset-label: + description: 'Label selector for kata-deploy daemonset (e.g., name=kata-as-coco-runtime or name=kata-as-coco-runtime-for-ci)' + required: false + default: 'name=kata-as-coco-runtime' + +outputs: + verification-status: + description: 'Overall verification status' + value: ${{ steps.verify.outputs.status }} + +runs: + using: composite + steps: + - name: Wait for kata-deploy daemonset + shell: bash + run: | + echo "โณ Waiting for kata-deploy daemonset to be ready..." + echo " Label selector: ${{ inputs.daemonset-label }}" + echo " Timeout: ${{ inputs.daemonset-timeout }}" + + if kubectl wait --for=condition=ready pod \ + -l ${{ inputs.daemonset-label }} \ + -n ${{ inputs.namespace }} \ + --timeout=${{ inputs.daemonset-timeout }}; then + echo "โœ… DaemonSet pods are ready" + else + echo "โŒ DaemonSet pods failed to become ready" + echo "" + echo "DaemonSet status:" + kubectl get daemonset -n ${{ inputs.namespace }} -l ${{ inputs.daemonset-label }} + echo "" + echo "Pod status:" + kubectl get pods -n ${{ inputs.namespace }} -l ${{ inputs.daemonset-label }} + echo "" + echo "Pod describe:" + kubectl describe pods -n ${{ inputs.namespace }} -l ${{ inputs.daemonset-label }} + exit 1 + fi + + - name: Verify daemonset status + shell: bash + run: | + echo "๐Ÿ” Verifying daemonset status..." + + # Get the daemonset name from the pod's owner reference + POD_NAME=$(kubectl get pods -n ${{ inputs.namespace }} -l ${{ inputs.daemonset-label }} -o jsonpath='{.items[0].metadata.name}') + if [ -z "$POD_NAME" ]; then + echo "โŒ No pods found with label ${{ inputs.daemonset-label }}" + exit 1 + fi + + DS_NAME=$(kubectl get pod "$POD_NAME" -n ${{ inputs.namespace }} -o jsonpath='{.metadata.ownerReferences[0].name}') + if [ -z "$DS_NAME" ]; then + echo "โŒ Could not find DaemonSet name from pod" + exit 1 + fi + + echo "Found DaemonSet: $DS_NAME" + kubectl get daemonset "$DS_NAME" -n ${{ inputs.namespace }} + + # Get desired vs ready count + DESIRED=$(kubectl get daemonset "$DS_NAME" -n ${{ inputs.namespace }} -o jsonpath='{.status.desiredNumberScheduled}') + READY=$(kubectl get daemonset "$DS_NAME" -n ${{ inputs.namespace }} -o jsonpath='{.status.numberReady}') + + echo "Desired: $DESIRED, Ready: $READY" + + if [ "$DESIRED" = "$READY" ] && [ "$READY" != "0" ]; then + echo "โœ… DaemonSet is healthy (${READY}/${DESIRED} pods ready)" + else + echo "โŒ DaemonSet is not healthy (${READY}/${DESIRED} pods ready)" + exit 1 + fi + + - name: Show daemonset logs + shell: bash + run: | + echo "๐Ÿ“‹ DaemonSet logs (last 50 lines):" + kubectl logs -n ${{ inputs.namespace }} -l ${{ inputs.daemonset-label }} --tail=50 --prefix=true + + - name: Verify RuntimeClasses + id: verify + shell: bash + run: | + echo "๐Ÿ” Verifying RuntimeClasses..." + echo " Expected: ${{ inputs.expected-runtime-classes }}" + echo " Timeout: 3 minutes" + + # Wait up to 3 minutes for RuntimeClasses to appear + TIMEOUT=180 # 3 minutes + INTERVAL=5 # Check every 5 seconds + ELAPSED=0 + + while [ $ELAPSED -lt $TIMEOUT ]; do + echo "" + echo "โฑ๏ธ Checking RuntimeClasses (elapsed: ${ELAPSED}s / ${TIMEOUT}s)..." + kubectl get runtimeclass 2>/dev/null || echo "No RuntimeClasses found yet" + + ALL_FOUND=true + for rc in ${{ inputs.expected-runtime-classes }}; do + if kubectl get runtimeclass "$rc" >/dev/null 2>&1; then + echo " โœ… Found RuntimeClass: $rc" + else + echo " โณ Waiting for RuntimeClass: $rc" + ALL_FOUND=false + fi + done + + if [ "$ALL_FOUND" = "true" ]; then + echo "" + echo "โœ… All expected RuntimeClasses exist" + echo "status=success" >> $GITHUB_OUTPUT + exit 0 + fi + + sleep $INTERVAL + ELAPSED=$((ELAPSED + INTERVAL)) + done + + # Timeout reached + echo "" + echo "โŒ Timeout: Some RuntimeClasses are still missing after ${TIMEOUT}s" + echo "Final state:" + kubectl get runtimeclass 2>/dev/null || echo "No RuntimeClasses found" + echo "status=failed" >> $GITHUB_OUTPUT + exit 1 + + - name: Show RuntimeClass details + shell: bash + run: | + echo "๐Ÿ“‹ RuntimeClass details:" + for rc in ${{ inputs.expected-runtime-classes }}; do + if kubectl get runtimeclass "$rc" >/dev/null 2>&1; then + echo "" + echo "=== $rc ===" + kubectl get runtimeclass "$rc" -o yaml + fi + done + diff --git a/.github/workflows/chart-lock-check.yaml b/.github/workflows/chart-lock-check.yaml new file mode 100644 index 0000000..60b1f9d --- /dev/null +++ b/.github/workflows/chart-lock-check.yaml @@ -0,0 +1,144 @@ +name: Chart.lock Validation + +on: + pull_request: + types: + - edited + - opened + - reopened + - synchronize + +jobs: + validate-chart-lock: + name: Validate Chart.lock + runs-on: ubuntu-22.04 + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Check Chart.lock for 0.0.0-dev entries + id: check + run: | + echo "๐Ÿ” Checking Chart.lock for 0.0.0-dev entries..." + + if [ ! -f Chart.lock ]; then + echo "โ„น๏ธ No Chart.lock file found" + echo "has_dev_version=false" >> $GITHUB_OUTPUT + exit 0 + fi + + # Check if Chart.lock contains 0.0.0-dev version + if grep -q "version: 0.0.0-dev" Chart.lock; then + echo "โŒ Found 0.0.0-dev in Chart.lock" + echo "has_dev_version=true" >> $GITHUB_OUTPUT + + echo "" + echo "Problematic entries:" + grep -B2 -A2 "version: 0.0.0-dev" Chart.lock || true + + echo "" + echo "::error::Chart.lock contains 0.0.0-dev entries that must be removed" + + else + echo "โœ… No 0.0.0-dev entries found in Chart.lock" + echo "has_dev_version=false" >> $GITHUB_OUTPUT + fi + + - name: Add comment to PR + if: steps.check.outputs.has_dev_version == 'true' + run: | + gh pr comment ${{ github.event.pull_request.number }} --body "## โŒ Chart.lock Contains 0.0.0-dev Entries + + Found \`0.0.0-dev\` entries in \`Chart.lock\` that must be removed. + + **Why this is a problem:** + The \`kata-as-coco-runtime-for-ci\` dependency uses \`version: 0.0.0-dev\` which is: + - Only for CI testing + - Not a real release + - Should never be committed to Chart.lock + + **How to fix:** + + Manually edit Chart.lock to remove the entire dependency block containing \`version: 0.0.0-dev\`: + + \`\`\`yaml + # Remove this entire block from Chart.lock: + - name: kata-deploy + repository: oci://ghcr.io/kata-containers/kata-deploy-charts + version: 0.0.0-dev + \`\`\` + + Then commit and push: + \`\`\`bash + git add Chart.lock + git commit -m \"fix: Remove 0.0.0-dev from Chart.lock\" + git push + \`\`\` + + **Why not regenerate with helm dependency update?** + Running \`helm dependency update\` would add the 0.0.0-dev entry back. This entry only exists for CI testing and should never be in Chart.lock. + + **Prevention:** + Chart.lock is managed by CI/CD workflows. Don't manually run \`helm dependency update\` - the prepare-release script handles updates correctly." + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Fail if 0.0.0-dev found + if: steps.check.outputs.has_dev_version == 'true' + run: | + echo "::error::Chart.lock contains 0.0.0-dev entries that must be removed" + exit 1 + + - name: Summary + if: always() + run: | + if [ "${{ steps.check.outputs.has_dev_version }}" = "true" ]; then + cat >> $GITHUB_STEP_SUMMARY << 'EOF' + ## โŒ Chart.lock Validation Failed + + Found `0.0.0-dev` entries in Chart.lock that must be removed. + + ### Why this is a problem + + The `kata-as-coco-runtime-for-ci` dependency uses `version: 0.0.0-dev` which is: + - Only used for CI testing + - Not a real release + - Should never be committed to Chart.lock + + ### How to fix + + Manually edit Chart.lock to remove the entire dependency block containing `version: 0.0.0-dev`: + + ```yaml + # Remove this entire block: + - name: kata-deploy + repository: oci://ghcr.io/kata-containers/kata-deploy-charts + version: 0.0.0-dev + ``` + + Then commit: + ```bash + git add Chart.lock + git commit -m "fix: Remove 0.0.0-dev from Chart.lock" + git push + ``` + + ### Why not regenerate? + + **Don't run `helm dependency update`** - it will add 0.0.0-dev back. + This entry only exists for CI testing. + + ### Prevention + + - Chart.lock is managed by CI/CD workflows + - The prepare-release script handles updates correctly + - Don't manually run `helm dependency update` + EOF + else + echo "## โœ… Chart.lock Validated" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "No \`0.0.0-dev\` entries found in Chart.lock." >> $GITHUB_STEP_SUMMARY + fi + diff --git a/.github/workflows/e2e-tests.yaml b/.github/workflows/e2e-tests.yaml new file mode 100644 index 0000000..ce78fda --- /dev/null +++ b/.github/workflows/e2e-tests.yaml @@ -0,0 +1,507 @@ +name: E2E Tests + +on: + pull_request: + types: + - edited + - opened + - reopened + - synchronize + workflow_dispatch: + +jobs: + check-changes: + name: Check What Changed + runs-on: ubuntu-24.04 + outputs: + kata-deploy-version-changed: ${{ steps.check-appversion.outputs.changed }} + templates-changed: ${{ steps.check-templates.outputs.changed }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Check if kata-deploy dependency version changed + id: check-appversion + run: | + echo "๐Ÿ” Checking if production kata-deploy dependency version in Chart.yaml changed..." + + # For workflow_dispatch, always run (set to true) + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + echo "changed=true" >> $GITHUB_OUTPUT + echo "โ„น๏ธ Manual trigger - will run all tests" + exit 0 + fi + + # Get the base branch for PR + BASE_SHA="${{ github.event.pull_request.base.sha }}" + + # Check if the production kata-deploy version changed (kata-as-coco-runtime) + # Specifically look for the kata-deploy dependency WITHOUT the "-for-ci" alias + # We extract the section between "name: kata-deploy" and the next dependency or end + # and check if the version line changed, excluding the CI variant (0.0.0-dev) + + if git diff "${BASE_SHA}" HEAD -- Chart.yaml | \ + grep -B1 -A3 'alias: kata-as-coco-runtime$' | \ + grep -E '^\+.*version:' | \ + grep -v '0.0.0-dev' || \ + git diff "${BASE_SHA}" HEAD -- Chart.yaml | \ + grep -B1 -A3 'alias: kata-as-coco-runtime$' | \ + grep -E '^\-.*version:' | \ + grep -v '0.0.0-dev'; then + echo "changed=true" >> $GITHUB_OUTPUT + echo "โœ… Production kata-deploy version changed - will run standard deployment test" + else + echo "changed=false" >> $GITHUB_OUTPUT + echo "โ„น๏ธ Production kata-deploy version unchanged - will skip standard deployment test" + fi + + - name: Check if templates changed + id: check-templates + run: | + echo "๐Ÿ” Checking if templates/values changed..." + + # For workflow_dispatch, always run (set to true) + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + echo "changed=true" >> $GITHUB_OUTPUT + echo "โ„น๏ธ Manual trigger - will run all tests" + exit 0 + fi + + BASE_SHA="${{ github.event.pull_request.base.sha }}" + + if git diff --name-only "${BASE_SHA}" HEAD | grep -E '(templates/|values\.yaml|values/|Chart\.yaml)'; then + echo "changed=true" >> $GITHUB_OUTPUT + echo "โœ… Templates/values changed - will run all E2E tests" + else + echo "changed=false" >> $GITHUB_OUTPUT + echo "โ„น๏ธ No template/values changes - will skip template-dependent tests" + fi + + e2e-tests: + name: E2E (${{ matrix.deployment-type }} / ${{ matrix.k8s-distro }}${{ matrix.image-pull-mode && format(' / {0}', matrix.image-pull-mode) || '' }}) + runs-on: ubuntu-24.04 + needs: check-changes + if: needs.check-changes.outputs.templates-changed == 'true' || needs.check-changes.outputs.kata-deploy-version-changed == 'true' + timeout-minutes: 45 + strategy: + fail-fast: false + matrix: + include: + # CI Variant - K3s with both pull modes + - deployment-type: ci + k8s-distro: k3s + image-pull-mode: nydus + - deployment-type: ci + k8s-distro: k3s + image-pull-mode: experimental-force-guest-pull + # CI Variant - K0s with both pull modes + - deployment-type: ci + k8s-distro: k0s + image-pull-mode: nydus + - deployment-type: ci + k8s-distro: k0s + image-pull-mode: experimental-force-guest-pull + # CI Variant - RKE2 with both pull modes + - deployment-type: ci + k8s-distro: rke2 + image-pull-mode: nydus + - deployment-type: ci + k8s-distro: rke2 + image-pull-mode: experimental-force-guest-pull + # CI Variant - MicroK8s with both pull modes + - deployment-type: ci + k8s-distro: microk8s + image-pull-mode: nydus + - deployment-type: ci + k8s-distro: microk8s + image-pull-mode: experimental-force-guest-pull + # CI Variant - Kubeadm with containerd latest - both pull modes + - deployment-type: ci + k8s-distro: kubeadm-containerd-latest + image-pull-mode: nydus + - deployment-type: ci + k8s-distro: kubeadm-containerd-latest + image-pull-mode: experimental-force-guest-pull + # CI Variant - Kubeadm with containerd 1.7 - both pull modes + - deployment-type: ci + k8s-distro: kubeadm-containerd-1.7 + image-pull-mode: nydus + - deployment-type: ci + k8s-distro: kubeadm-containerd-1.7 + image-pull-mode: experimental-force-guest-pull + ## CI Variant - Kubeadm with CRI-O + #- deployment-type: ci + # k8s-distro: kubeadm-crio + + # Standard Deployment - K3s with both pull modes + - deployment-type: standard + k8s-distro: k3s + image-pull-mode: nydus + - deployment-type: standard + k8s-distro: k3s + image-pull-mode: experimental-force-guest-pull + # Standard Deployment - K0s with both pull modes + - deployment-type: standard + k8s-distro: k0s + image-pull-mode: nydus + - deployment-type: standard + k8s-distro: k0s + image-pull-mode: experimental-force-guest-pull + # Standard Deployment - RKE2 with both pull modes + - deployment-type: standard + k8s-distro: rke2 + image-pull-mode: nydus + - deployment-type: standard + k8s-distro: rke2 + image-pull-mode: experimental-force-guest-pull + # Standard Deployment - MicroK8s with both pull modes + - deployment-type: standard + k8s-distro: microk8s + image-pull-mode: nydus + - deployment-type: standard + k8s-distro: microk8s + image-pull-mode: experimental-force-guest-pull + # Standard Deployment - Kubeadm with containerd latest - both pull modes + - deployment-type: standard + k8s-distro: kubeadm-containerd-latest + image-pull-mode: nydus + - deployment-type: standard + k8s-distro: kubeadm-containerd-latest + image-pull-mode: experimental-force-guest-pull + # Standard Deployment - Kubeadm with containerd 1.7 - both pull modes + - deployment-type: standard + k8s-distro: kubeadm-containerd-1.7 + image-pull-mode: nydus + - deployment-type: standard + k8s-distro: kubeadm-containerd-1.7 + image-pull-mode: experimental-force-guest-pull + ## Standard Deployment - Kubeadm with CRI-O + #- deployment-type: standard + # k8s-distro: kubeadm-crio + + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + steps: + - name: Check if this matrix entry should run + id: should-run + run: | + # CI tests run only when templates changed + # Standard tests run only when kata-deploy version changed + if [ "${{ matrix.deployment-type }}" = "ci" ] && [ "${{ needs.check-changes.outputs.templates-changed }}" = "true" ]; then + echo "should-run=true" >> $GITHUB_OUTPUT + echo "โœ… CI test will run (templates changed)" + elif [ "${{ matrix.deployment-type }}" = "standard" ] && [ "${{ needs.check-changes.outputs.kata-deploy-version-changed }}" = "true" ]; then + echo "should-run=true" >> $GITHUB_OUTPUT + echo "โœ… Standard test will run (kata-deploy version changed)" + else + echo "should-run=false" >> $GITHUB_OUTPUT + echo "โญ๏ธ Skipping this test (conditions not met)" + fi + + - name: Checkout code + if: steps.should-run.outputs.should-run == 'true' + uses: actions/checkout@v4 + + - name: Setup Kubernetes cluster (k3s) + if: steps.should-run.outputs.should-run == 'true' && matrix.k8s-distro == 'k3s' + uses: ./.github/actions/setup-k8s-k3s + + - name: Setup Kubernetes cluster (k0s) + if: steps.should-run.outputs.should-run == 'true' && matrix.k8s-distro == 'k0s' + uses: ./.github/actions/setup-k8s-k0s + + - name: Setup Kubernetes cluster (rke2) + if: steps.should-run.outputs.should-run == 'true' && matrix.k8s-distro == 'rke2' + uses: ./.github/actions/setup-k8s-rke2 + + - name: Setup Kubernetes cluster (microk8s) + if: steps.should-run.outputs.should-run == 'true' && matrix.k8s-distro == 'microk8s' + uses: ./.github/actions/setup-k8s-microk8s + + - name: Setup Kubernetes cluster (kubeadm with containerd latest) + if: steps.should-run.outputs.should-run == 'true' && matrix.k8s-distro == 'kubeadm-containerd-latest' + uses: ./.github/actions/setup-k8s-kubeadm + with: + container-runtime: containerd + runtime-version: latest + + - name: Setup Kubernetes cluster (kubeadm with containerd 1.7) + if: steps.should-run.outputs.should-run == 'true' && matrix.k8s-distro == 'kubeadm-containerd-1.7' + uses: ./.github/actions/setup-k8s-kubeadm + with: + container-runtime: containerd + runtime-version: "1.7" + + - name: Setup Kubernetes cluster (kubeadm with CRI-O) + if: steps.should-run.outputs.should-run == 'true' && matrix.k8s-distro == 'kubeadm-crio' + uses: ./.github/actions/setup-k8s-kubeadm + with: + container-runtime: crio + runtime-version: latest + + - name: Determine k8s distribution name + if: steps.should-run.outputs.should-run == 'true' + id: k8s-distro-name + run: | + case "${{ matrix.k8s-distro }}" in + k3s) echo "name=k3s" >> $GITHUB_OUTPUT ;; + k0s) echo "name=k0s" >> $GITHUB_OUTPUT ;; + rke2) echo "name=rke2" >> $GITHUB_OUTPUT ;; + microk8s) echo "name=microk8s" >> $GITHUB_OUTPUT ;; + kubeadm-containerd-latest) echo "name=k8s" >> $GITHUB_OUTPUT ;; + kubeadm-containerd-1.7) echo "name=k8s" >> $GITHUB_OUTPUT ;; + kubeadm-crio) echo "name=k8s" >> $GITHUB_OUTPUT ;; + *) echo "name=k8s" >> $GITHUB_OUTPUT ;; + esac + + - name: Setup Helm + if: steps.should-run.outputs.should-run == 'true' + uses: azure/setup-helm@v4 + with: + version: '3.13.1' + + - name: Update Helm dependencies + if: steps.should-run.outputs.should-run == 'true' + run: | + echo "๐Ÿ“ฆ Updating Helm chart dependencies..." + helm dependency update + echo "โœ… Dependencies updated" + + - name: Prepare Helm extra args + if: steps.should-run.outputs.should-run == 'true' + id: helm-args + run: | + # Determine which chart variant to use based on deployment type + if [ "${{ matrix.deployment-type }}" = "ci" ]; then + CHART_VARIANT="kata-as-coco-runtime-for-ci" + ARGS="--set kata-as-coco-runtime.enabled=false --set ${CHART_VARIANT}.enabled=true" + else + CHART_VARIANT="kata-as-coco-runtime" + ARGS="--set ${CHART_VARIANT}.enabled=true" + fi + + # Add k8s distribution + ARGS="${ARGS} --set ${CHART_VARIANT}.k8sDistribution=${{ steps.k8s-distro-name.outputs.name }}" + + # Add image pull mode specific flags (only for CI, standard uses defaults) + if [ "${{ matrix.deployment-type }}" = "ci" ] && [ "${{ matrix.image-pull-mode }}" = "experimental-force-guest-pull" ]; then + ARGS="${ARGS} --set ${CHART_VARIANT}.env.snapshotterHandlerMapping='' --set ${CHART_VARIANT}.env.pullTypeMapping=\"\" --set ${CHART_VARIANT}.env._experimentalSetupSnapshotter=\"\" --set ${CHART_VARIANT}.env._experimentalForceGuestPull=qemu-coco-dev" + fi + + echo "args=${ARGS}" >> $GITHUB_OUTPUT + + - name: Determine deployment parameters + if: steps.should-run.outputs.should-run == 'true' + id: deployment-params + run: | + if [ "${{ matrix.deployment-type }}" = "ci" ]; then + echo "release-name=coco-ci" >> $GITHUB_OUTPUT + echo "daemonset-label=name=kata-as-coco-runtime-for-ci" >> $GITHUB_OUTPUT + echo "expected-runtimeclasses=kata-qemu-coco-dev" >> $GITHUB_OUTPUT + echo "test-pod-name=kata-ci-test-pod" >> $GITHUB_OUTPUT + else + echo "release-name=coco" >> $GITHUB_OUTPUT + echo "daemonset-label=name=kata-as-coco-runtime" >> $GITHUB_OUTPUT + echo "expected-runtimeclasses=kata-qemu-coco-dev kata-qemu-snp kata-qemu-tdx" >> $GITHUB_OUTPUT + echo "test-pod-name=kata-standard-test-pod" >> $GITHUB_OUTPUT + fi + + - name: Install chart + if: steps.should-run.outputs.should-run == 'true' + uses: ./.github/actions/install-chart + with: + release-name: ${{ steps.deployment-params.outputs.release-name }} + namespace: kube-system + extra-args: ${{ steps.helm-args.outputs.args }} + wait-timeout: 15m + + - name: Verify deployment + if: steps.should-run.outputs.should-run == 'true' + uses: ./.github/actions/verify-deployment + with: + namespace: kube-system + expected-runtime-classes: ${{ steps.deployment-params.outputs.expected-runtimeclasses }} + daemonset-timeout: 15m + daemonset-label: ${{ steps.deployment-params.outputs.daemonset-label }} + + - name: Run test pod + if: steps.should-run.outputs.should-run == 'true' + uses: ./.github/actions/run-test-pod + with: + runtime-class: kata-qemu-coco-dev + namespace: default + pod-name: ${{ steps.deployment-params.outputs.test-pod-name }} + timeout: 5m + + - name: Collect logs on failure + if: failure() + run: | + echo "๐Ÿ“‹ Collecting diagnostic information..." + + echo "=== Helm releases ===" + helm list -A + + echo "" + echo "=== All pods ===" + kubectl get pods -A + + echo "" + echo "=== DaemonSet status ===" + kubectl get daemonset -n kube-system + + echo "" + echo "=== RuntimeClasses ===" + kubectl get runtimeclass + + echo "" + echo "=== Kata Containers Configuration ===" + RUNTIME_CLASS="kata-qemu-coco-dev" + # Extract the configuration file name from RuntimeClass (e.g., kata-qemu-coco-dev -> qemu-coco-dev) + CONFIG_NAME="${RUNTIME_CLASS#kata-}" + + # Get installation prefix from Helm values (defaults) + INSTALL_PREFIX="/opt/kata" + + CONFIG_FILE="${INSTALL_PREFIX}/share/defaults/kata-containers/configuration-${CONFIG_NAME}.toml" + + echo "RuntimeClass: ${RUNTIME_CLASS}" + echo "Configuration file: ${CONFIG_FILE}" + echo "" + + # Get node name + NODE=$(kubectl get nodes -o jsonpath='{.items[0].metadata.name}') + echo "Reading from node: ${NODE}" + echo "" + + # Create a temporary pod to read the configuration file + POD_NAME="kata-config-reader-$(date +%s)" + cat </dev/null + apiVersion: v1 + kind: Pod + metadata: + name: ${POD_NAME} + namespace: default + spec: + hostPID: true + hostNetwork: true + nodeName: ${NODE} + containers: + - name: reader + image: busybox:latest + command: ['sleep', '60'] + volumeMounts: + - name: host-root + mountPath: /host + readOnly: true + securityContext: + privileged: true + volumes: + - name: host-root + hostPath: + path: / + restartPolicy: Never + EOF + + # Wait for pod to be ready + echo "Waiting for reader pod..." + kubectl wait --for=condition=Ready pod/${POD_NAME} -n default --timeout=30s >/dev/null 2>&1 || true + sleep 2 + + # Read the configuration file + echo "--- ${CONFIG_FILE} ---" + if kubectl exec ${POD_NAME} -n default -- cat /host${CONFIG_FILE} 2>/dev/null; then + echo "" + echo "โœ… Configuration file displayed successfully" + else + echo "" + echo "โŒ Failed to read configuration file" + echo "" + echo "Attempting to list available configurations:" + kubectl exec ${POD_NAME} -n default -- ls -la /host${INSTALL_PREFIX}/share/defaults/kata-containers/ 2>/dev/null || echo "Failed to list directory" + fi + + # Cleanup + kubectl delete pod ${POD_NAME} -n default --ignore-not-found=true >/dev/null 2>&1 || true + + echo "" + echo "=== kata-deploy logs ===" + kubectl logs -n kube-system -l ${{ steps.deployment-params.outputs.daemonset-label }} --tail=200 --prefix=true || echo "No logs available" + + echo "" + echo "=== Events ===" + kubectl get events -A --sort-by='.lastTimestamp' | tail -50 + + - name: Uninstall chart + if: always() + run: | + RELEASE_NAME="${{ steps.deployment-params.outputs.release-name }}" + + # Check if release name was set (i.e., test actually ran) + if [ -z "${RELEASE_NAME}" ]; then + echo "โญ๏ธ No chart to uninstall (test was skipped)" + exit 0 + fi + + echo "๐Ÿ—‘๏ธ Uninstalling chart: ${RELEASE_NAME}..." + helm uninstall ${RELEASE_NAME} -n kube-system --wait --timeout 5m || true + + echo "๐Ÿ” Verifying cleanup..." + echo "Remaining pods:" + kubectl get pods -n kube-system -l app.kubernetes.io/instance=${RELEASE_NAME} || echo "No pods found" + + echo "Remaining RuntimeClasses:" + kubectl get runtimeclass || echo "No RuntimeClasses found" + + + test-summary: + name: E2E Test Summary + runs-on: ubuntu-24.04 + needs: [e2e-tests] + if: always() + steps: + - name: Generate summary + run: | + echo "# E2E Test Summary" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + + if [ "${{ needs.e2e-tests.result }}" = "success" ]; then + echo "โœ… E2E Tests (CI + Standard): **PASSED**" >> $GITHUB_STEP_SUMMARY + elif [ "${{ needs.e2e-tests.result }}" = "skipped" ]; then + echo "โญ๏ธ E2E Tests: **SKIPPED** (no changes triggering tests)" >> $GITHUB_STEP_SUMMARY + else + echo "โŒ E2E Tests: **FAILED**" >> $GITHUB_STEP_SUMMARY + fi + + echo "" >> $GITHUB_STEP_SUMMARY + echo "## Test Coverage" >> $GITHUB_STEP_SUMMARY + echo "### K8s Distributions" >> $GITHUB_STEP_SUMMARY + echo "- โœ… k3s (nydus-snapshotter & experimental_force_guest_pull)" >> $GITHUB_STEP_SUMMARY + echo "- โœ… k0s (nydus-snapshotter & experimental_force_guest_pull)" >> $GITHUB_STEP_SUMMARY + echo "- โœ… rke2 (nydus-snapshotter & experimental_force_guest_pull)" >> $GITHUB_STEP_SUMMARY + echo "- โœ… microk8s (nydus-snapshotter & experimental_force_guest_pull)" >> $GITHUB_STEP_SUMMARY + echo "- โœ… kubeadm with containerd 1.7 (nydus-snapshotter & experimental_force_guest_pull)" >> $GITHUB_STEP_SUMMARY + echo "- โœ… kubeadm with latest containerd (nydus-snapshotter & experimental_force_guest_pull)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### Features Tested" >> $GITHUB_STEP_SUMMARY + echo "- โœ… Helm chart installation" >> $GITHUB_STEP_SUMMARY + echo "- โœ… kata-deploy daemonset deployment" >> $GITHUB_STEP_SUMMARY + echo "- โœ… RuntimeClass creation" >> $GITHUB_STEP_SUMMARY + echo "- โœ… Pod scheduling with Kata runtime" >> $GITHUB_STEP_SUMMARY + echo "- โœ… Image pulling with containerd (nydus snapshotter)" >> $GITHUB_STEP_SUMMARY + echo "- โœ… Image pulling with containerd (guest-pull mode)" >> $GITHUB_STEP_SUMMARY + #echo "- โœ… Image pulling with CRI-O" >> $GITHUB_STEP_SUMMARY + echo "- โœ… CI variant (kata-containers-latest)" >> $GITHUB_STEP_SUMMARY + echo "- โœ… Standard deployment (CoCo releases)" >> $GITHUB_STEP_SUMMARY + + - name: Check overall tests status + run: | + # Check for failures (but allow skipped tests) + if [ "${{ needs.e2e-tests.result }}" = "failure" ]; then + echo "โŒ E2E tests failed" + exit 1 + fi + echo "โœ… All E2E tests passed or were skipped as expected" + diff --git a/Chart.lock b/Chart.lock index 2c9600d..844655d 100644 --- a/Chart.lock +++ b/Chart.lock @@ -2,5 +2,5 @@ dependencies: - name: kata-deploy repository: oci://ghcr.io/kata-containers/kata-deploy-charts version: 3.21.0 -digest: sha256:b977d4d978dba0ba373655e26101006178d8e5a63fec3f7cf9a79529d8e59684 -generated: "2025-10-24T16:35:30.992306339+02:00" +digest: sha256:65eb9c198f907fb4eb18a8b8679b3ec624c34008516ada467382463baab6a5c0 +generated: "2025-10-24T17:10:59.456732835+02:00" diff --git a/Chart.yaml b/Chart.yaml index 276944e..5ffda97 100644 --- a/Chart.yaml +++ b/Chart.yaml @@ -18,3 +18,8 @@ dependencies: version: "3.21.0" repository: "oci://ghcr.io/kata-containers/kata-deploy-charts" condition: kata-as-coco-runtime.enabled + - name: kata-deploy + alias: kata-as-coco-runtime-for-ci + version: "0.0.0-dev" + repository: "oci://ghcr.io/kata-containers/kata-deploy-charts" + condition: kata-as-coco-runtime-for-ci.enabled diff --git a/values.yaml b/values.yaml index 639610e..a23c10e 100644 --- a/values.yaml +++ b/values.yaml @@ -65,3 +65,16 @@ kata-as-coco-runtime: # - values/kata-aarch64.yaml for ARM64 # - values/kata-remote.yaml for peer-pods <<: *x86_64_shims + +# Optional: CI variant using upstream kata-containers-latest +# Disabled by default. Uses same shims as CoCo runtime but different image. +# Enable with: --set kata-as-coco-runtime-for-ci.enabled=true +kata-as-coco-runtime-for-ci: + enabled: false + <<: *commonConfig + + env: + debug: "true" + <<: *x86_64_shims + _experimentalSetupSnapshotter: "nydus" + _experimentalForceGuestPull: "" \ No newline at end of file diff --git a/values/kata-aarch64.yaml b/values/kata-aarch64.yaml index 9a05097..75ff1c7 100644 --- a/values/kata-aarch64.yaml +++ b/values/kata-aarch64.yaml @@ -22,3 +22,13 @@ kata-as-coco-runtime: env: debug: "false" <<: *aarch64_shims + +kata-as-coco-runtime-for-ci: + <<: *commonConfig + + env: + debug: "true" + <<: *aarch64_shims + _experimentalSetupSnapshotter: "nydus" + _experimentalForceGuestPull: "" + diff --git a/values/kata-remote.yaml b/values/kata-remote.yaml index a13540a..9b6a186 100644 --- a/values/kata-remote.yaml +++ b/values/kata-remote.yaml @@ -20,3 +20,12 @@ kata-as-coco-runtime: env: debug: "false" <<: *remote_shims + +kata-as-coco-runtime-for-ci: + <<: *commonConfig + + env: + debug: "true" + <<: *remote_shims + _experimentalSetupSnapshotter: "nydus" + _experimentalForceGuestPull: "" \ No newline at end of file diff --git a/values/kata-s390x.yaml b/values/kata-s390x.yaml index d3e118f..9023a8b 100644 --- a/values/kata-s390x.yaml +++ b/values/kata-s390x.yaml @@ -22,3 +22,12 @@ kata-as-coco-runtime: env: debug: "false" <<: *s390x_shims + +kata-as-coco-runtime-for-ci: + <<: *commonConfig + + env: + debug: "true" + <<: *s390x_shims + _experimentalSetupSnapshotter: "nydus" + _experimentalForceGuestPull: "" \ No newline at end of file