Skip to content

Commit 057c007

Browse files
authored
chore(v3): add upgrade reporting (#3046)
* chore(v3): add upgrade reporting * tests
1 parent 3f96aa9 commit 057c007

File tree

7 files changed

+277
-17
lines changed

7 files changed

+277
-17
lines changed

api/controllers/linux/upgrade/controller.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ type UpgradeController struct {
5353
stateMachine statemachine.Interface
5454
requiresInfraUpgrade bool
5555
logger logrus.FieldLogger
56+
targetVersion string
57+
initialVersion string
5658
// App controller composition
5759
*appcontroller.AppController
5860
}
@@ -167,6 +169,18 @@ func WithEndUserConfig(endUserConfig *ecv1beta1.Config) UpgradeControllerOption
167169
}
168170
}
169171

172+
func WithTargetVersion(targetVersion string) UpgradeControllerOption {
173+
return func(c *UpgradeController) {
174+
c.targetVersion = targetVersion
175+
}
176+
}
177+
178+
func WithInitialVersion(initialVersion string) UpgradeControllerOption {
179+
return func(c *UpgradeController) {
180+
c.initialVersion = initialVersion
181+
}
182+
}
183+
170184
func NewUpgradeController(opts ...UpgradeControllerOption) (*UpgradeController, error) {
171185
controller := &UpgradeController{
172186
store: store.NewMemoryStore(),
@@ -252,6 +266,8 @@ func NewUpgradeController(opts ...UpgradeControllerOption) (*UpgradeController,
252266
controller.AppController = appController
253267
}
254268

269+
controller.registerReportingHandlers()
270+
255271
return controller, nil
256272
}
257273

api/controllers/linux/upgrade/controller_test.go

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/replicatedhq/embedded-cluster/api/internal/store"
1515
"github.com/replicatedhq/embedded-cluster/api/types"
1616
ecv1beta1 "github.com/replicatedhq/embedded-cluster/kinds/apis/v1beta1"
17+
"github.com/replicatedhq/embedded-cluster/pkg/metrics"
1718
"github.com/replicatedhq/embedded-cluster/pkg/release"
1819
"github.com/replicatedhq/embedded-cluster/pkg/runtimeconfig"
1920
kotsv1beta1 "github.com/replicatedhq/kotskinds/apis/kots/v1beta1"
@@ -271,6 +272,171 @@ func TestGetInfra(t *testing.T) {
271272
}
272273
}
273274

275+
func TestReportingHandlers(t *testing.T) {
276+
tests := []struct {
277+
name string
278+
currentState statemachine.State
279+
targetState statemachine.State
280+
requiresInfraUpgrade bool
281+
targetVersion string
282+
initialVersion string
283+
setupMocks func(*metrics.MockReporter, *store.MockStore)
284+
}{
285+
{
286+
name: "report upgrade succeeded",
287+
currentState: states.StateAppUpgrading,
288+
targetState: states.StateSucceeded,
289+
requiresInfraUpgrade: false,
290+
targetVersion: "1.0.0",
291+
initialVersion: "0.9.0",
292+
setupMocks: func(mr *metrics.MockReporter, st *store.MockStore) {
293+
mr.On("ReportUpgradeSucceeded", mock.Anything, "1.0.0", "0.9.0")
294+
},
295+
},
296+
{
297+
name: "report infrastructure upgrade failed",
298+
currentState: states.StateInfrastructureUpgrading,
299+
targetState: states.StateInfrastructureUpgradeFailed,
300+
requiresInfraUpgrade: true,
301+
targetVersion: "1.0.0",
302+
initialVersion: "0.9.0",
303+
setupMocks: func(mr *metrics.MockReporter, st *store.MockStore) {
304+
st.LinuxInfraMockStore.On("GetStatus").Return(types.Status{
305+
State: types.StateFailed,
306+
Description: "infrastructure upgrade failed",
307+
}, nil)
308+
mr.On("ReportUpgradeFailed", mock.Anything, mock.MatchedBy(func(err error) bool {
309+
return err.Error() == "infrastructure upgrade failed"
310+
}), "1.0.0", "0.9.0")
311+
},
312+
},
313+
{
314+
name: "report app upgrade failed",
315+
currentState: states.StateAppUpgrading,
316+
targetState: states.StateAppUpgradeFailed,
317+
requiresInfraUpgrade: false,
318+
targetVersion: "1.0.0",
319+
initialVersion: "0.9.0",
320+
setupMocks: func(mr *metrics.MockReporter, st *store.MockStore) {
321+
st.AppUpgradeMockStore.On("GetStatus").Return(types.Status{
322+
State: types.StateFailed,
323+
Description: "app upgrade failed",
324+
}, nil)
325+
mr.On("ReportUpgradeFailed", mock.Anything, mock.MatchedBy(func(err error) bool {
326+
return err.Error() == "app upgrade failed"
327+
}), "1.0.0", "0.9.0")
328+
},
329+
},
330+
{
331+
name: "report app preflights succeeded",
332+
currentState: states.StateAppPreflightsRunning,
333+
targetState: states.StateAppPreflightsSucceeded,
334+
requiresInfraUpgrade: false,
335+
setupMocks: func(mr *metrics.MockReporter, st *store.MockStore) {
336+
mr.On("ReportAppPreflightsSucceeded", mock.Anything)
337+
},
338+
},
339+
{
340+
name: "report app preflights failed",
341+
currentState: states.StateAppPreflightsRunning,
342+
targetState: states.StateAppPreflightsFailed,
343+
requiresInfraUpgrade: false,
344+
setupMocks: func(mr *metrics.MockReporter, st *store.MockStore) {
345+
output := &types.PreflightsOutput{
346+
Fail: []types.PreflightsRecord{
347+
{
348+
Title: "Test Check",
349+
Message: "Test check failed",
350+
Strict: true,
351+
},
352+
},
353+
}
354+
st.AppPreflightMockStore.On("GetOutput").Return(output, nil)
355+
mr.On("ReportAppPreflightsFailed", mock.Anything, output)
356+
},
357+
},
358+
{
359+
name: "report app preflights bypassed",
360+
currentState: states.StateAppPreflightsFailed,
361+
targetState: states.StateAppPreflightsFailedBypassed,
362+
requiresInfraUpgrade: false,
363+
setupMocks: func(mr *metrics.MockReporter, st *store.MockStore) {
364+
output := &types.PreflightsOutput{
365+
Fail: []types.PreflightsRecord{
366+
{
367+
Title: "Non-strict Check",
368+
Message: "Test check failed but can be bypassed",
369+
Strict: false,
370+
},
371+
},
372+
}
373+
st.AppPreflightMockStore.On("GetOutput").Return(output, nil)
374+
mr.On("ReportAppPreflightsBypassed", mock.Anything, output)
375+
},
376+
},
377+
}
378+
379+
for _, tt := range tests {
380+
t.Run(tt.name, func(t *testing.T) {
381+
mockMetricsReporter := &metrics.MockReporter{}
382+
mockStore := &store.MockStore{}
383+
mockInfraManager := &infra.MockInfraManager{}
384+
385+
tt.setupMocks(mockMetricsReporter, mockStore)
386+
387+
// Mock RequiresUpgrade which is called during controller initialization
388+
mockInfraManager.On("RequiresUpgrade", mock.Anything, mock.Anything).Return(tt.requiresInfraUpgrade, nil)
389+
390+
// Create state machine starting in the current state
391+
sm := NewStateMachine(
392+
WithCurrentState(tt.currentState),
393+
WithRequiresInfraUpgrade(tt.requiresInfraUpgrade),
394+
)
395+
396+
// Create app controller (required for upgrade controller)
397+
appController, err := appcontroller.NewAppController(
398+
appcontroller.WithStateMachine(sm),
399+
appcontroller.WithStore(mockStore),
400+
appcontroller.WithReleaseData(getTestReleaseData(&kotsv1beta1.Config{})),
401+
)
402+
require.NoError(t, err)
403+
404+
// Create upgrade controller with metrics reporter
405+
controller, err := NewUpgradeController(
406+
WithStateMachine(sm),
407+
WithAppController(appController),
408+
WithMetricsReporter(mockMetricsReporter),
409+
WithStore(mockStore),
410+
WithInfraManager(mockInfraManager),
411+
WithTargetVersion(tt.targetVersion),
412+
WithInitialVersion(tt.initialVersion),
413+
WithReleaseData(getTestReleaseData(&kotsv1beta1.Config{})),
414+
)
415+
require.NoError(t, err)
416+
417+
// Trigger the state transition
418+
lock, err := sm.AcquireLock()
419+
require.NoError(t, err)
420+
defer lock.Release()
421+
422+
err = sm.Transition(lock, tt.targetState)
423+
require.NoError(t, err)
424+
425+
// Wait for the event handler goroutine to complete
426+
time.Sleep(1 * time.Second)
427+
428+
// Verify that the metrics reporter was called as expected
429+
mockMetricsReporter.AssertExpectations(t)
430+
mockStore.LinuxInfraMockStore.AssertExpectations(t)
431+
mockStore.AppUpgradeMockStore.AssertExpectations(t)
432+
mockStore.AppPreflightMockStore.AssertExpectations(t)
433+
434+
// Avoid unused variable error
435+
_ = controller
436+
})
437+
}
438+
}
439+
274440
func getTestReleaseData(appConfig *kotsv1beta1.Config) *release.ReleaseData {
275441
return &release.ReleaseData{
276442
EmbeddedClusterConfig: &ecv1beta1.Config{},
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
package upgrade
2+
3+
import (
4+
"context"
5+
"errors"
6+
"fmt"
7+
8+
"github.com/replicatedhq/embedded-cluster/api/internal/statemachine"
9+
"github.com/replicatedhq/embedded-cluster/api/internal/states"
10+
"github.com/replicatedhq/embedded-cluster/api/types"
11+
)
12+
13+
func (c *UpgradeController) registerReportingHandlers() {
14+
c.stateMachine.RegisterEventHandler(states.StateInfrastructureUpgradeFailed, c.reportUpgradeFailed)
15+
c.stateMachine.RegisterEventHandler(states.StateAppUpgradeFailed, c.reportUpgradeFailed)
16+
c.stateMachine.RegisterEventHandler(states.StateSucceeded, c.reportUpgradeSucceeded)
17+
18+
// report preflight failures and bypassed
19+
c.stateMachine.RegisterEventHandler(states.StateAppPreflightsFailed, c.reportAppPreflightsFailed)
20+
c.stateMachine.RegisterEventHandler(states.StateAppPreflightsFailedBypassed, c.reportAppPreflightsBypassed)
21+
c.stateMachine.RegisterEventHandler(states.StateAppPreflightsSucceeded, c.reportAppPreflightsSucceeded)
22+
}
23+
24+
func (c *UpgradeController) reportUpgradeFailed(ctx context.Context, _, toState statemachine.State) {
25+
var status types.Status
26+
var err error
27+
28+
switch toState {
29+
case states.StateInfrastructureUpgradeFailed:
30+
status, err = c.store.LinuxInfraStore().GetStatus()
31+
if err != nil {
32+
err = fmt.Errorf("get status from infra store: %w", err)
33+
}
34+
case states.StateAppUpgradeFailed:
35+
status, err = c.store.AppUpgradeStore().GetStatus()
36+
if err != nil {
37+
err = fmt.Errorf("get status from app upgrade store: %w", err)
38+
}
39+
}
40+
if err != nil {
41+
c.logger.WithError(err).Error("failed to report failed upgrade")
42+
return
43+
}
44+
45+
c.logger.Info("Reporting metrics event upgrade failed")
46+
c.metricsReporter.ReportUpgradeFailed(ctx, errors.New(status.Description), c.targetVersion, c.initialVersion)
47+
}
48+
49+
func (c *UpgradeController) reportUpgradeSucceeded(ctx context.Context, _, _ statemachine.State) {
50+
c.logger.Info("Reporting metrics event upgrade succeeded")
51+
c.metricsReporter.ReportUpgradeSucceeded(ctx, c.targetVersion, c.initialVersion)
52+
}
53+
54+
func (c *UpgradeController) reportAppPreflightsFailed(ctx context.Context, _, _ statemachine.State) {
55+
output, err := c.store.AppPreflightStore().GetOutput()
56+
if err != nil {
57+
err = fmt.Errorf("get output from app preflight store: %w", err)
58+
c.logger.WithError(err).Error("failed to report app preflights failed")
59+
return
60+
}
61+
c.logger.Info("Reporting metrics event app preflights failed")
62+
c.metricsReporter.ReportAppPreflightsFailed(ctx, output)
63+
}
64+
65+
func (c *UpgradeController) reportAppPreflightsBypassed(ctx context.Context, _, _ statemachine.State) {
66+
output, err := c.store.AppPreflightStore().GetOutput()
67+
if err != nil {
68+
err = fmt.Errorf("get output from app preflight store: %w", err)
69+
c.logger.WithError(err).Error("failed to report app preflights bypassed")
70+
return
71+
}
72+
c.logger.Info("Reporting metrics event app preflights bypassed")
73+
c.metricsReporter.ReportAppPreflightsBypassed(ctx, output)
74+
}
75+
76+
func (c *UpgradeController) reportAppPreflightsSucceeded(ctx context.Context, _, _ statemachine.State) {
77+
c.logger.Info("Reporting metrics event app preflights succeeded")
78+
c.metricsReporter.ReportAppPreflightsSucceeded(ctx)
79+
}

api/controllers/linux/upgrade/statemachine.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ var baseStateTransitions = map[statemachine.State][]statemachine.State{
2828

2929
// Infrastructure-specific state transitions
3030
var infraStateTransitions = map[statemachine.State][]statemachine.State{
31-
states.StateInfrastructureUpgrading: {states.StateInfrastructureUpgraded, states.StateInfrastructureUpgradeFailed},
32-
states.StateInfrastructureUpgraded: {states.StateAppPreflightsRunning},
31+
states.StateInfrastructureUpgrading: {states.StateInfrastructureUpgraded, states.StateInfrastructureUpgradeFailed},
32+
states.StateInfrastructureUpgraded: {states.StateAppPreflightsRunning},
33+
// final states
3334
states.StateInfrastructureUpgradeFailed: {},
3435
}
3536

api/internal/handlers/linux/linux.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ func New(cfg types.APIConfig, opts ...Option) (*Handler, error) {
127127
upgrade.WithConfigValues(h.cfg.ConfigValues),
128128
upgrade.WithEndUserConfig(h.cfg.EndUserConfig),
129129
upgrade.WithClusterID(h.cfg.ClusterID),
130+
upgrade.WithTargetVersion(h.cfg.TargetVersion),
131+
upgrade.WithInitialVersion(h.cfg.InitialVersion),
130132
)
131133
if err != nil {
132134
return nil, fmt.Errorf("new upgrade controller: %w", err)

api/types/api.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ type APIConfig struct {
2424
ClusterID string
2525
Target Target
2626
Mode Mode
27+
TargetVersion string // Used for upgrade metrics reporting
28+
InitialVersion string // Used for upgrade metrics reporting
2729

2830
LinuxConfig
2931
KubernetesConfig

cmd/installer/cli/upgrade.go

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"strings"
1111
"syscall"
1212

13-
"github.com/AlecAivazis/survey/v2/terminal"
1413
apitypes "github.com/replicatedhq/embedded-cluster/api/types"
1514
"github.com/replicatedhq/embedded-cluster/cmd/installer/goods"
1615
"github.com/replicatedhq/embedded-cluster/cmd/installer/kotscli"
@@ -129,21 +128,14 @@ func UpgradeCmd(ctx context.Context, appSlug, appTitle string) *cobra.Command {
129128
)
130129
metricsReporter.ReportUpgradeStarted(ctx)
131130

132-
// Setup signal handler with the metrics reporter cleanup function
133-
signalHandler(ctx, cancel, func(ctx context.Context, sig os.Signal) {
134-
metricsReporter.ReportSignalAborted(ctx, sig)
135-
})
136-
137-
if err := runManagerExperienceUpgrade(ctx, flags, upgradeConfig, existingRC, metricsReporter.reporter, appTitle); err != nil {
138-
// Check if this is an interrupt error from the terminal
139-
if errors.Is(err, terminal.InterruptErr) {
140-
metricsReporter.ReportSignalAborted(ctx, syscall.SIGINT)
141-
} else {
142-
metricsReporter.ReportUpgradeFailed(ctx, err)
143-
}
131+
// Run the manager experience upgrade - the upgrade controller will handle
132+
// reporting success/failure events through its event handlers
133+
if err := runManagerExperienceUpgrade(
134+
ctx, flags, upgradeConfig, existingRC, metricsReporter.reporter, appTitle,
135+
targetVersion, initialVersion,
136+
); err != nil {
144137
return err
145138
}
146-
metricsReporter.ReportUpgradeSucceeded(ctx)
147139

148140
return nil
149141
},
@@ -433,7 +425,7 @@ func readPasswordHash(ctx context.Context, kcli client.Client) ([]byte, error) {
433425

434426
func runManagerExperienceUpgrade(
435427
ctx context.Context, flags UpgradeCmdFlags, upgradeConfig upgradeConfig, rc runtimeconfig.RuntimeConfig,
436-
metricsReporter metrics.ReporterInterface, appTitle string,
428+
metricsReporter metrics.ReporterInterface, appTitle string, targetVersion string, initialVersion string,
437429
) (finalErr error) {
438430
apiConfig := apiOptions{
439431
APIConfig: apitypes.APIConfig{
@@ -450,6 +442,8 @@ func runManagerExperienceUpgrade(
450442
ClusterID: upgradeConfig.clusterID,
451443
Target: apitypes.Target(flags.target),
452444
Mode: apitypes.ModeUpgrade,
445+
TargetVersion: targetVersion,
446+
InitialVersion: initialVersion,
453447

454448
LinuxConfig: apitypes.LinuxConfig{
455449
RuntimeConfig: rc,

0 commit comments

Comments
 (0)