Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
290 changes: 290 additions & 0 deletions pkg-new/upgrade/distribute_artifacts_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
package upgrade

import (
"context"
"testing"

apv1b2 "github.com/k0sproject/k0s/pkg/apis/autopilot/v1beta2"
"github.com/k0sproject/k0s/pkg/autopilot/controller/plans/core"
ecv1beta1 "github.com/replicatedhq/embedded-cluster/kinds/apis/v1beta1"
ectypes "github.com/replicatedhq/embedded-cluster/kinds/types"
"github.com/replicatedhq/embedded-cluster/operator/pkg/artifacts"
"github.com/replicatedhq/embedded-cluster/operator/pkg/release"
"github.com/replicatedhq/embedded-cluster/pkg/runtimeconfig"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)

func createTestNodes() []corev1.Node {
return []corev1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
Status: corev1.NodeStatus{
Conditions: []corev1.NodeCondition{
{
Type: corev1.NodeReady,
Status: corev1.ConditionTrue,
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "node2",
},
Status: corev1.NodeStatus{
Conditions: []corev1.NodeCondition{
{
Type: corev1.NodeReady,
Status: corev1.ConditionTrue,
},
},
},
},
}
}

func createFailedAutopilotPlan() *apv1b2.Plan {
return &apv1b2.Plan{
ObjectMeta: metav1.ObjectMeta{
Name: "autopilot",
Annotations: map[string]string{
artifacts.InstallationNameAnnotation: "test-installation",
},
},
Status: apv1b2.PlanStatus{
State: core.PlanApplyFailed,
},
}
}

func TestDistributeArtifacts_SuccessfulOnline(t *testing.T) {
ctx := context.Background()
logger := logrus.New()

// Set up fast polling for tests
t.Setenv("AUTOPILOT_POLL_INTERVAL", "100ms")
t.Setenv("AUTOPILOT_MAX_STEPS", "10")
t.Setenv("AUTOPILOT_NODE_POLL_INTERVAL", "100ms")
t.Setenv("AUTOPILOT_NODE_MAX_STEPS", "10")

// Create fake client with necessary schemes
scheme := runtime.NewScheme()
require.NoError(t, ecv1beta1.AddToScheme(scheme))
require.NoError(t, batchv1.AddToScheme(scheme))
require.NoError(t, corev1.AddToScheme(scheme))
require.NoError(t, apv1b2.AddToScheme(scheme))

Check failure on line 85 in pkg-new/upgrade/distribute_artifacts_test.go

View workflow job for this annotation

GitHub Actions / Sanitize

SA1019: apv1b2.AddToScheme is deprecated: use Install instead (staticcheck)

// Create test nodes
nodes := createTestNodes()
initialObjects := make([]client.Object, len(nodes))
for i, node := range nodes {
initialObjects[i] = &node
}

baseCli := fake.NewClientBuilder().
WithScheme(scheme).
WithStatusSubresource(&ecv1beta1.Installation{}).
WithObjects(initialObjects...).
Build()

// Wrap with mock client that simulates successful job completion
mockCli := &mockClientWithStateChange{
Client: baseCli,
callCount: 0,
callsUntil: 1, // Change state after first call
finalJobStatus: batchv1.JobStatus{Succeeded: 1}, // Mark jobs as succeeded
// No autopilot plan needed for online mode
}

// Create upgrader
upgrader := NewInfraUpgrader(
WithKubeClient(mockCli),
WithRuntimeConfig(runtimeconfig.New(&ecv1beta1.RuntimeConfigSpec{})),
WithLogger(logger),
)

// Create test installation
installation := createTestInstallation()
installation.Spec.AirGap = false

// Execute the function
err := upgrader.DistributeArtifacts(ctx, installation, "test-mirror:latest", "test-license", "test-app", "test-channel", "1.0.0")

// The function should succeed as our mock client simulates successful job completion
require.NoError(t, err)
}

func TestDistributeArtifacts_SuccessfulAirgap(t *testing.T) {
ctx := context.Background()
logger := logrus.New()

// Set up fast polling for tests
t.Setenv("AUTOPILOT_POLL_INTERVAL", "100ms")
t.Setenv("AUTOPILOT_MAX_STEPS", "10")
t.Setenv("AUTOPILOT_NODE_POLL_INTERVAL", "100ms")
t.Setenv("AUTOPILOT_NODE_MAX_STEPS", "10")

// Cache release metadata for the test
meta := &ectypes.ReleaseMetadata{
Versions: map[string]string{
"Kubernetes": "v1.30.14+k0s.0",
},
}
release.CacheMeta("1.30.14+k0s.0", *meta)

// Create fake client with necessary schemes
scheme := runtime.NewScheme()
require.NoError(t, ecv1beta1.AddToScheme(scheme))
require.NoError(t, batchv1.AddToScheme(scheme))
require.NoError(t, corev1.AddToScheme(scheme))
require.NoError(t, apv1b2.AddToScheme(scheme))

Check failure on line 150 in pkg-new/upgrade/distribute_artifacts_test.go

View workflow job for this annotation

GitHub Actions / Sanitize

SA1019: apv1b2.AddToScheme is deprecated: use Install instead (staticcheck)

// Create test nodes
nodes := createTestNodes()
initialObjects := make([]client.Object, len(nodes))
for i, node := range nodes {
initialObjects[i] = &node
}

// Add registry secret for airgap
registrySecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "registry-creds",
Namespace: "kotsadm",
},
Data: map[string][]byte{
"username": []byte("test-user"),
"password": []byte("test-password"),
},
}
initialObjects = append(initialObjects, registrySecret)

baseCli := fake.NewClientBuilder().
WithScheme(scheme).
WithStatusSubresource(&ecv1beta1.Installation{}).
WithObjects(initialObjects...).
Build()

// Wrap with mock client that simulates successful job completion and autopilot plan success
mockCli := &mockClientWithStateChange{
Client: baseCli,
callCount: 0,
callsUntil: 1, // Change state after first call
finalJobStatus: batchv1.JobStatus{Succeeded: 1}, // Mark jobs as succeeded
finalPlanState: core.PlanCompleted,
}

// Create upgrader
upgrader := NewInfraUpgrader(
WithKubeClient(mockCli),
WithRuntimeConfig(runtimeconfig.New(&ecv1beta1.RuntimeConfigSpec{})),
WithLogger(logger),
)

// Create test installation
installation := createTestInstallation()
installation.Spec.AirGap = true
installation.Spec.Artifacts = &ecv1beta1.ArtifactsLocation{
Images: "test-images-url",
HelmCharts: "test-helmcharts-url",
EmbeddedClusterBinary: "test-binary-url",
EmbeddedClusterMetadata: "test-metadata-url",
}

// Execute the function
err := upgrader.DistributeArtifacts(ctx, installation, "test-mirror:latest", "test-license", "test-app", "test-channel", "1.0.0")

// The function should succeed as our mock client simulates successful job completion and autopilot plan success
require.NoError(t, err)
}

func TestDistributeArtifacts_AirgapAutopilotPlanFailure(t *testing.T) {
ctx := context.Background()
logger := logrus.New()

// Set up fast polling for tests
t.Setenv("AUTOPILOT_POLL_INTERVAL", "100ms")
t.Setenv("AUTOPILOT_MAX_STEPS", "10")
t.Setenv("AUTOPILOT_NODE_POLL_INTERVAL", "100ms")
t.Setenv("AUTOPILOT_NODE_MAX_STEPS", "10")

// Create fake client with necessary schemes
scheme := runtime.NewScheme()
require.NoError(t, ecv1beta1.AddToScheme(scheme))
require.NoError(t, batchv1.AddToScheme(scheme))
require.NoError(t, corev1.AddToScheme(scheme))
require.NoError(t, apv1b2.AddToScheme(scheme))

Check failure on line 226 in pkg-new/upgrade/distribute_artifacts_test.go

View workflow job for this annotation

GitHub Actions / Sanitize

SA1019: apv1b2.AddToScheme is deprecated: use Install instead (staticcheck)

// Create test nodes
nodes := createTestNodes()
initialObjects := make([]client.Object, len(nodes))
for i, node := range nodes {
initialObjects[i] = &node
}

// Add registry secret for airgap
registrySecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "registry-creds",
Namespace: "kotsadm",
},
Data: map[string][]byte{
"username": []byte("test-user"),
"password": []byte("test-password"),
},
}
initialObjects = append(initialObjects, registrySecret)

// Add failed autopilot plan
autopilotPlan := createFailedAutopilotPlan()
initialObjects = append(initialObjects, autopilotPlan)

baseCli := fake.NewClientBuilder().
WithScheme(scheme).
WithStatusSubresource(&ecv1beta1.Installation{}).
WithObjects(initialObjects...).
Build()

// Wrap with mock client that simulates successful job completion but failed autopilot plan
mockCli := &mockClientWithStateChange{
Client: baseCli,
callCount: 0,
callsUntil: 1, // Change state after first call
finalJobStatus: batchv1.JobStatus{Succeeded: 1}, // Mark jobs as succeeded
finalPlanState: core.PlanApplyFailed, // Autopilot plan fails
}

// Create upgrader
upgrader := NewInfraUpgrader(
WithKubeClient(mockCli),
WithRuntimeConfig(runtimeconfig.New(&ecv1beta1.RuntimeConfigSpec{})),
WithLogger(logger),
)

// Create test installation
installation := createTestInstallation()
installation.Spec.AirGap = true
installation.Spec.Artifacts = &ecv1beta1.ArtifactsLocation{
Images: "test-images-url",
HelmCharts: "test-helmcharts-url",
EmbeddedClusterBinary: "test-binary-url",
EmbeddedClusterMetadata: "test-metadata-url",
}

// Execute the function
err := upgrader.DistributeArtifacts(ctx, installation, "test-mirror:latest", "test-license", "test-app", "test-channel", "1.0.0")

// Verify error due to failed autopilot plan
require.Error(t, err)
assert.Contains(t, err.Error(), "autopilot plan failed")
}
49 changes: 40 additions & 9 deletions pkg-new/upgrade/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import (
"context"
"errors"
"fmt"
"os"
"reflect"
"strconv"
"time"

apv1b2 "github.com/k0sproject/k0s/pkg/apis/autopilot/v1beta2"
Expand Down Expand Up @@ -362,9 +364,25 @@ func createAutopilotPlan(ctx context.Context, cli client.Client, rc runtimeconfi
}

func waitForAutopilotPlan(ctx context.Context, cli client.Client, logger logrus.FieldLogger) (apv1b2.Plan, error) {
// Get poll interval from environment variable, default to 20 seconds
pollInterval := 20 * time.Second
if envInterval := os.Getenv("AUTOPILOT_POLL_INTERVAL"); envInterval != "" {
if duration, err := time.ParseDuration(envInterval); err == nil {
pollInterval = duration
}
}

// Get max steps from environment variable, default to 90 attempts × 20s = 1800s = 30 minutes
maxSteps := 90
if envSteps := os.Getenv("AUTOPILOT_MAX_STEPS"); envSteps != "" {
if steps, err := strconv.Atoi(envSteps); err == nil && steps > 0 {
maxSteps = steps
}
}

backoff := wait.Backoff{
Duration: 20 * time.Second, // 20-second polling interval
Steps: 90, // 90 attempts × 20s = 1800s = 30 minutes
Duration: pollInterval,
Steps: maxSteps,
}

var plan apv1b2.Plan
Expand Down Expand Up @@ -402,15 +420,28 @@ func waitForAutopilotPlan(ctx context.Context, cli client.Client, logger logrus.
}

func waitForClusterNodesMatchVersion(ctx context.Context, cli client.Client, desiredVersion string, logger logrus.FieldLogger) error {
return waitForClusterNodesMatchVersionWithBackoff(ctx, cli, desiredVersion, logger, wait.Backoff{
Duration: 5 * time.Second,
Steps: 60, // 60 attempts × 5s = 300s = 5 minutes
// Get poll interval from environment variable, default to 5 seconds
pollInterval := 5 * time.Second
if envInterval := os.Getenv("AUTOPILOT_NODE_POLL_INTERVAL"); envInterval != "" {
if duration, err := time.ParseDuration(envInterval); err == nil {
pollInterval = duration
}
}

// Get max steps from environment variable, default to 60 attempts × 5s = 300s = 5 minutes
maxSteps := 60
if envSteps := os.Getenv("AUTOPILOT_NODE_MAX_STEPS"); envSteps != "" {
if steps, err := strconv.Atoi(envSteps); err == nil && steps > 0 {
maxSteps = steps
}
}

backoff := wait.Backoff{
Duration: pollInterval,
Steps: maxSteps,
Factor: 1.0,
Jitter: 0.1,
})
}

func waitForClusterNodesMatchVersionWithBackoff(ctx context.Context, cli client.Client, desiredVersion string, logger logrus.FieldLogger, backoff wait.Backoff) error {
}
var lastErr error

err := wait.ExponentialBackoffWithContext(ctx, backoff, func(ctx context.Context) (bool, error) {
Expand Down
Loading
Loading