Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 15 additions & 30 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ jobs:
name: Build & Run E2E Images
runs-on: [self-hosted, linux, X64, jammy, xlarge]
steps:
-
name: Login to GitHub Container Registry
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
# We run into rate limiting issues if we don't authenticate
Expand All @@ -40,28 +39,16 @@ jobs:
sudo snap install kubectl --classic --channel=1.32/stable
- name: Build provider images
run: sudo env "PATH=$PATH" make docker-build-e2e
- name: Build k8s-snap images
working-directory: hack/
run: |
./build-e2e-images.sh
- name: Save provider image
run: |
sudo docker save -o provider-images.tar ghcr.io/canonical/cluster-api-k8s/controlplane-controller:dev ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:dev
sudo chmod 775 provider-images.tar
- name: Save k8s-snap image
run: |
sudo docker save -o k8s-snap-image-old.tar k8s-snap:dev-old
sudo docker save -o k8s-snap-image-new.tar k8s-snap:dev-new
sudo chmod 775 k8s-snap-image-old.tar
sudo chmod 775 k8s-snap-image-new.tar
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: e2e-images
path: |
provider-images.tar
k8s-snap-image-old.tar
k8s-snap-image-new.tar

run-e2e-tests:
name: Run E2E Tests
Expand All @@ -80,8 +67,7 @@ jobs:
# TODO(ben): Remove once all tests are running stable.
fail-fast: false
steps:
-
name: Login to GitHub Container Registry
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
# We run into rate limiting issues if we don't authenticate
Expand All @@ -94,6 +80,13 @@ jobs:
uses: actions/setup-go@v5
with:
go-version-file: go.mod
- name: Setup LXD
uses: canonical/[email protected]
with:
bridges: "lxdbr0"
- name: Configure LXD
run: |
sudo ./hack/setup-lxd.sh
- name: Install requirements
run: |
sudo apt update
Expand All @@ -105,31 +98,23 @@ jobs:
with:
name: e2e-images
path: .
- name: Load provider image
run: sudo docker load -i provider-images.tar
- name: Load k8s-snap old image
run: |
sudo docker load -i k8s-snap-image-old.tar
- name: Load k8s-snap new image
if: matrix.ginkgo_focus == 'Workload cluster upgrade'
- name: Setup bootstrap cluster
run: |
sudo docker load -i k8s-snap-image-new.tar
sudo ./hack/setup-bootstrap-cluster.sh bootstrap-cluster 1.32 ./provider-images.tar
- name: Create docker network
run: |
sudo docker network create kind --driver=bridge -o com.docker.network.bridge.enable_ip_masquerade=true
- name: Increase inotify watches
run: |
# Prevents https://cluster-api.sigs.k8s.io/user/troubleshooting#cluster-api-with-docker----too-many-open-files
sudo sysctl fs.inotify.max_user_watches=1048576
sudo sysctl fs.inotify.max_user_instances=8192
- name: Setup tmate session
uses: canonical/action-tmate@main
if: ${{ github.event_name == 'workflow_dispatch' && inputs.tmate_enabled }}
with:
detached: true
- name: Run e2e tests
run: |
sudo env "PATH=$PATH" GINKGO_FOCUS="${{ matrix.ginkgo_focus }}" SKIP_RESOURCE_CLEANUP=true make test-e2e
sudo env "PATH=$PATH" GINKGO_FOCUS="${{ matrix.ginkgo_focus }}" \
SKIP_RESOURCE_CLEANUP=true \
USE_EXISTING_CLUSTER=true \
make test-e2e
- name: Change artifact permissions
if: always()
run: |
Expand Down
9 changes: 7 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,12 @@ GINKGO_NODES ?= 1 # GINKGO_NODES is the number of parallel nodes to run
GINKGO_TIMEOUT ?= 2h
GINKGO_POLL_PROGRESS_AFTER ?= 60m
GINKGO_POLL_PROGRESS_INTERVAL ?= 5m
E2E_INFRA ?= docker
E2E_INFRA ?= incus
E2E_CONF_FILE ?= $(TEST_DIR)/e2e/config/ck8s-$(E2E_INFRA).yaml
SKIP_RESOURCE_CLEANUP ?= false
USE_EXISTING_CLUSTER ?= false
# EXISTING_CLUSTER_KUBECONFIG_PATH ?= $(HOME)/.kube/config
# PROVIDER_IMAGES_TAR_PATH ?= $(shell pwd)/provider-images.tar
GINKGO_NOCOLOR ?= false

# to set multiple ginkgo skip flags, if any
Expand Down Expand Up @@ -287,7 +289,10 @@ test-e2e: $(GINKGO) $(KUSTOMIZE) ## Run the end-to-end tests
--output-dir="$(ARTIFACTS)" --junit-report="junit.e2e_suite.1.xml" $(GINKGO_ARGS) $(TEST_DIR)/e2e -- \
-e2e.artifacts-folder="$(ARTIFACTS)" \
-e2e.config="$(E2E_CONF_FILE)" \
-e2e.skip-resource-cleanup=$(SKIP_RESOURCE_CLEANUP) -e2e.use-existing-cluster=$(USE_EXISTING_CLUSTER)
-e2e.skip-resource-cleanup=$(SKIP_RESOURCE_CLEANUP) \
-e2e.use-existing-cluster=$(USE_EXISTING_CLUSTER)
# -e2e.existing-cluster-kubeconfig-path=$(EXISTING_CLUSTER_KUBECONFIG_PATH) \
# -e2e.provider-images-tar-path=$(PROVIDER_IMAGES_TAR_PATH)

# Build manager binary
manager-controlplane: generate-controlplane
Expand Down
129 changes: 129 additions & 0 deletions hack/setup-bootstrap-cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#!/bin/bash
# Copyright 2022 The Tinkerbell Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -o errexit
set -o nounset
set -o pipefail

# Retry function
# Usage: retry <max_attempts> <delay_seconds> <command...>
retry() {
local max_attempts=$1
local delay=$2
shift 2
local attempt=1

while [ $attempt -le $max_attempts ]; do
if "$@"; then
return 0
else
if [ $attempt -lt $max_attempts ]; then
echo " Attempt $attempt failed. Retrying in ${delay}s..."
sleep $delay
attempt=$((attempt + 1))
else
echo " All $max_attempts attempts failed."
return 1
fi
fi
done
}

# Description:
# Setup the bootstrap cluster in a LXD container
# Usage:
# ./hack/setup-management-cluster.sh <bootstrap-cluster-name> <k8s-version> <provider-images-path>
# Example:
# ./hack/setup-management-cluster.sh bootstrap-cluster 1.32 provider-images.tar

if [ -z "${1:-}" ]; then
echo "Error: bootstrap-cluster-name is required"
echo "Usage: $0 <bootstrap-cluster-name> <k8s-version> <provider-images-path>"
exit 1
fi

if [ -z "${2:-}" ]; then
echo "Error: k8s-version is required"
echo "Usage: $0 <bootstrap-cluster-name> <k8s-version> <provider-images-path>"
exit 1
fi

if [ -z "${3:-}" ]; then
echo "Error: provider-images-path is required"
echo "Usage: $0 <bootstrap-cluster-name> <k8s-version> <provider-images-path>"
exit 1
fi

bootstrap_cluster_name=${1}
bootstrap_cluster_version=${2}
provider_images_path=${3}

echo "==> Launching LXD container '$bootstrap_cluster_name' with Ubuntu 24.04..."
sudo lxc -p default -p k8s-integration launch ubuntu:24.04 $bootstrap_cluster_name

echo "==> Installing k8s snap (version $bootstrap_cluster_version)..."
retry 5 5 sudo lxc exec $bootstrap_cluster_name -- snap install k8s --classic --channel=$bootstrap_cluster_version-classic/stable

echo "==> Bootstrapping k8s cluster..."
retry 5 5 sudo lxc exec $bootstrap_cluster_name -- k8s bootstrap

echo "==> Pushing provider images to container..."
sudo lxc file push $provider_images_path $bootstrap_cluster_name/root/provider-images.tar

echo "==> Loading provider images into containerd..."
sudo lxc exec $bootstrap_cluster_name -- /snap/k8s/current/bin/ctr -n k8s.io images import /root/provider-images.tar

echo "==> Getting bootstrap cluster IP address..."
bootstrap_cluster_ip=$(sudo lxc exec $bootstrap_cluster_name -- bash -c "ip -4 addr show eth0 | grep -oP '(?<=inet\s)\d+(\.\d+){3}'")
echo " Bootstrap cluster IP: $bootstrap_cluster_ip"

echo "==> Creating cluster-info configmap..."
kubeconfig="apiVersion: v1
clusters:
- cluster:
server: https://${bootstrap_cluster_ip}:6443
name: ""
contexts: null
current-context: ""
kind: Config
users: null"

echo " Creating temporary kubeconfig file..."
temp_kubeconfig=$(mktemp)
echo "$kubeconfig" > "$temp_kubeconfig"

echo " Pushing kubeconfig $temp_kubeconfig to container at /tmp/$bootstrap_cluster_name-cluster-info.yaml..."
sudo lxc file push "$temp_kubeconfig" "$bootstrap_cluster_name/tmp/$bootstrap_cluster_name-cluster-info.yaml"

echo " Creating cluster-info configmap in kube-public namespace..."
sudo lxc exec $bootstrap_cluster_name -- k8s kubectl create configmap cluster-info -n kube-public --from-file=kubeconfig=/tmp/$bootstrap_cluster_name-cluster-info.yaml

echo " Cleaning up temporary files..."
rm "$temp_kubeconfig"
sudo lxc exec $bootstrap_cluster_name -- rm /tmp/$bootstrap_cluster_name-cluster-info.yaml

echo "==> Setting up kubeconfig..."
sudo lxc exec $bootstrap_cluster_name -- mkdir -p /root/.kube
sudo lxc exec $bootstrap_cluster_name -- bash -c "k8s config > /root/.kube/config"

echo "==> Pulling kubeconfig from $bootstrap_cluster_name to ~/.kube/config..."
mkdir -p ~/.kube
sudo lxc file pull $bootstrap_cluster_name/root/.kube/config ~/.kube/config

echo "==> Setup complete! Bootstrap cluster '$bootstrap_cluster_name' is ready."




16 changes: 16 additions & 0 deletions hack/setup-lxd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/bash

# https://capn.linuxcontainers.org/tutorial/quick-start.html
ip_address="$(ip -o route get to 1.1.1.1 | sed -n 's/.*src \([0-9.]\+\).*/\1/p')"

sudo lxd init --auto --network-address "$ip_address"
sudo lxc network set lxdbr0 ipv6.address=none
sudo lxc cluster enable "$ip_address"

token="$(sudo lxc config trust add --name client | tail -1)"
sudo lxc remote add local-https --token "$token" "https://$(sudo lxc config get core.https_address)"
sudo lxc remote set-default local-https

wget https://raw.githubusercontent.com/canonical/k8s-snap/refs/heads/main/tests/integration/lxd-profile.yaml

sudo lxc profile create k8s-integration < lxd-profile.yaml
32 changes: 29 additions & 3 deletions pkg/cloudinit/scripts/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,38 @@
## - /capi/etc/snap-local-path contains the path to the local snap file to be installed (e.g. /path/to/k8s.snap),
## or the path to a folder containing the local snap files to be installed (e.g. /path/to)

# Function to retry snap installation with a maximum number of attempts
# and a delay between attempts. This is useful in case of transient errors
retry_snap_install() {
local max_attempts=5
local delay=3
local attempt=1

while [ $attempt -le $max_attempts ]; do
echo "Attempt $attempt of $max_attempts to install snap..."
if "$@"; then
echo "Snap installation succeeded"
return 0
else
echo "Snap installation failed"
if [ $attempt -lt $max_attempts ]; then
echo "Retrying in $delay seconds..."
sleep $delay
fi
fi
attempt=$((attempt + 1))
done

echo "Failed to install snap after $max_attempts attempts"
return 1
}

if [ -f "/capi/etc/snap-channel" ]; then
snap_channel="$(cat /capi/etc/snap-channel)"
snap install k8s --classic --channel "${snap_channel}"
retry_snap_install snap install k8s --classic --channel "${snap_channel}"
elif [ -f "/capi/etc/snap-revision" ]; then
snap_revision="$(cat /capi/etc/snap-revision)"
snap install k8s --classic --revision "${snap_revision}"
retry_snap_install snap install k8s --classic --revision "${snap_revision}"
elif [ -f "/capi/etc/snap-local-path" ]; then
snap_local_path="$(cat /capi/etc/snap-local-path)"
snap_local_paths=( "${snap_local_path}" )
Expand All @@ -20,7 +46,7 @@ elif [ -f "/capi/etc/snap-local-path" ]; then
if [[ -d "${snap_local_path}" ]]; then
snap_local_paths=($(ls ${snap_local_path}/*.snap))
fi
snap install --classic --dangerous "${snap_local_paths[@]}"
retry_snap_install snap install --classic --dangerous "${snap_local_paths[@]}"
else
echo "No snap installation option found"
exit 1
Expand Down
6 changes: 3 additions & 3 deletions test/e2e/cluster_upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
"k8s.io/utils/ptr"
"sigs.k8s.io/cluster-api/test/framework"
"sigs.k8s.io/cluster-api/test/framework/clusterctl"
"sigs.k8s.io/cluster-api/util"
Expand Down Expand Up @@ -129,6 +128,8 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp
// Setup a Namespace where to host objects for this spec and create a watcher for the namespace events.
namespace, cancelWatches = setupSpecNamespace(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder)

createLXCSecretForIncus(ctx, input.BootstrapClusterProxy, input.E2EConfig, namespace.Name)

result = new(ApplyClusterTemplateAndWaitResult)

clusterctlLogFolder = filepath.Join(input.ArtifactFolder, "clusters", input.BootstrapClusterProxy.GetName())
Expand Down Expand Up @@ -178,8 +179,8 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp
ControlPlane: result.ControlPlane,
MaxControlPlaneMachineCount: maxControlPlaneMachineCount,
KubernetesUpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo),
UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-control-plane-old", clusterName)),
WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"),
InfrastructureProviders: input.E2EConfig.InfrastructureProviders(),
})

By("Upgrading the machine deployment")
Expand All @@ -188,7 +189,6 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp
Cluster: result.Cluster,
UpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo),
MachineDeployments: result.MachineDeployments,
UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-md-new-0", clusterName)),
WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
})

Expand Down
2 changes: 1 addition & 1 deletion test/e2e/cluster_upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ var _ = Describe("Workload cluster upgrade with MaxSurge=0 [CK8s-Upgrade]", func
InfrastructureProvider: ptr.To(clusterctl.DefaultInfrastructureProvider),
ControlPlaneMachineCount: ptr.To[int64](3),
WorkerMachineCount: ptr.To[int64](1),
Flavor: ptr.To[string](flavorUpgradesMaxSurge0),
Flavor: ptr.To(flavorUpgradesMaxSurge0),
}
})
})
Expand Down
Loading
Loading