Skip to content

Commit 6873b56

Browse files
committed
Add formal checks to manual rollback arguments
1 parent a7db767 commit 6873b56

File tree

2 files changed

+195
-42
lines changed

2 files changed

+195
-42
lines changed

internal/pkg/agent/application/upgrade/upgrade.go

Lines changed: 75 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,13 @@ var agentArtifact = artifact.Artifact{
5757
}
5858

5959
var (
60-
ErrWatcherNotStarted = errors.New("watcher did not start in time")
61-
ErrUpgradeSameVersion = errors.New("upgrade did not occur because it is the same version")
62-
ErrNonFipsToFips = errors.New("cannot switch to fips mode when upgrading")
63-
ErrFipsToNonFips = errors.New("cannot switch to non-fips mode when upgrading")
64-
ErrNilUpdateMarker = errors.New("loaded a nil update marker")
60+
ErrWatcherNotStarted = errors.New("watcher did not start in time")
61+
ErrUpgradeSameVersion = errors.New("upgrade did not occur because it is the same version")
62+
ErrNonFipsToFips = errors.New("cannot switch to fips mode when upgrading")
63+
ErrFipsToNonFips = errors.New("cannot switch to non-fips mode when upgrading")
64+
ErrNilUpdateMarker = errors.New("loaded a nil update marker")
65+
ErrEmptyRollbackVersion = errors.New("rollback version is empty")
66+
ErrNoRollbacksAvailable = errors.New("no rollbacks available")
6567
)
6668

6769
func init() {
@@ -222,7 +224,7 @@ func checkUpgrade(log *logger.Logger, currentVersion, newVersion agentVersion, m
222224
func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, sourceURI string, action *fleetapi.ActionUpgrade, det *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) {
223225

224226
if rollback {
225-
return u.forceRollbackToPreviousVersion(ctx, paths.Top(), version, action, det)
227+
return u.forceRollbackToPreviousVersion(ctx, paths.Top(), time.Now(), version, action)
226228
}
227229

228230
u.log.Infow("Upgrading agent", "version", version, "source_uri", sourceURI)
@@ -406,7 +408,11 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, rollback bool, s
406408
return cb, nil
407409
}
408410

409-
func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, topDir string, version string, action *fleetapi.ActionUpgrade, d *details.Details) (reexec.ShutdownCallbackFn, error) {
411+
func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, topDir string, now time.Time, version string, action *fleetapi.ActionUpgrade) (reexec.ShutdownCallbackFn, error) {
412+
if version == "" {
413+
return nil, ErrEmptyRollbackVersion
414+
}
415+
410416
// check that the upgrade marker exists and is accessible
411417
updateMarkerPath := markerFilePath(paths.DataFrom(topDir))
412418
_, err := os.Stat(updateMarkerPath)
@@ -419,18 +425,53 @@ func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, topDir st
419425
// 2. there has been at least the first restart with the new agent (i.e. we are not still downloading/extracting/rotating)
420426
// 3. upgrade marker exists
421427
// these should be revalidated after taking over watcher
422-
updateMarker, err := u.persistManualRollback(ctx, topDir)
423-
if err != nil {
424-
return nil, fmt.Errorf("persisting rollback in update marker: %w", err)
425-
}
426428

427-
previous, current, err := extractAgentInstallsFromMarker(updateMarker)
429+
watcherExecutable := ""
430+
err = withTakeOverWatcher(ctx, u.log, topDir, u.watcherHelper, func() error {
431+
// read the upgrade marker
432+
updateMarker, err := LoadMarker(paths.DataFrom(topDir))
433+
if err != nil {
434+
return fmt.Errorf("loading marker: %w", err)
435+
}
436+
437+
if updateMarker == nil {
438+
return ErrNilUpdateMarker
439+
}
440+
441+
if updateMarker.Details == nil || len(updateMarker.Details.Metadata.RollbacksAvailable) == 0 {
442+
return ErrNoRollbacksAvailable
443+
}
444+
var selectedRollback *details.RollbackAvailable
445+
for _, rollback := range updateMarker.Details.Metadata.RollbacksAvailable {
446+
if rollback.Version == version && now.Before(rollback.ValidUntil) {
447+
selectedRollback = &rollback
448+
break
449+
}
450+
}
451+
if selectedRollback == nil {
452+
return fmt.Errorf("version %q not listed among the available rollbacks: %w", version, ErrNoRollbacksAvailable)
453+
}
454+
455+
// write the desired outcome of the upgrade
456+
err = u.persistManualRollback(topDir, updateMarker)
457+
if err != nil {
458+
return fmt.Errorf("persisting rollback in update marker: %w", err)
459+
}
460+
461+
// extract the agent installs involved in the upgrade and select the most appropriate watcher executable
462+
previous, current, err := extractAgentInstallsFromMarker(updateMarker)
463+
if err != nil {
464+
return fmt.Errorf("extracting current and previous install details: %w", err)
465+
}
466+
watcherExecutable = u.watcherHelper.SelectWatcherExecutable(topDir, previous, current)
467+
return nil
468+
})
469+
428470
if err != nil {
429-
return nil, fmt.Errorf("extracting current and previous install details: %w", err)
471+
return nil, err
430472
}
431473

432-
// Invoke watcher again
433-
watcherExecutable := u.watcherHelper.SelectWatcherExecutable(topDir, previous, current)
474+
// Invoke watcher again (now that we released the watcher applocks)
434475
_, err = u.watcherHelper.InvokeWatcher(u.log, watcherExecutable)
435476
if err != nil {
436477
return nil, fmt.Errorf("invoking watcher: %w", err)
@@ -440,6 +481,21 @@ func (u *Upgrader) forceRollbackToPreviousVersion(ctx context.Context, topDir st
440481

441482
}
442483

484+
func withTakeOverWatcher(ctx context.Context, log *logger.Logger, topDir string, watcherHelper WatcherHelper, f func() error) error {
485+
watcherApplock, err := watcherHelper.TakeOverWatcher(ctx, log, topDir)
486+
if err != nil {
487+
return fmt.Errorf("taking over watcher processes: %w", err)
488+
}
489+
defer func(watcherApplock *filelock.AppLocker) {
490+
releaseWatcherAppLockerErr := watcherApplock.Unlock()
491+
if releaseWatcherAppLockerErr != nil {
492+
log.Warnw("error releasing watcher applock", "error", releaseWatcherAppLockerErr)
493+
}
494+
}(watcherApplock)
495+
496+
return f()
497+
}
498+
443499
func extractAgentInstallsFromMarker(updateMarker *UpdateMarker) (previous agentInstall, current agentInstall, err error) {
444500
previousParsedVersion, err := agtversion.ParseVersion(updateMarker.PrevVersion)
445501
if err != nil {
@@ -466,35 +522,14 @@ func extractAgentInstallsFromMarker(updateMarker *UpdateMarker) (previous agentI
466522
return previous, current, nil
467523
}
468524

469-
func (u *Upgrader) persistManualRollback(ctx context.Context, topDir string) (*UpdateMarker, error) {
470-
watcherApplock, err := u.watcherHelper.TakeOverWatcher(ctx, u.log, topDir)
471-
if err != nil {
472-
return nil, fmt.Errorf("taking over watcher processes: %w", err)
473-
}
474-
defer func(watcherApplock *filelock.AppLocker) {
475-
releaseWatcherAppLockerErr := watcherApplock.Unlock()
476-
if releaseWatcherAppLockerErr != nil {
477-
u.log.Warnw("error releasing watcher applock", "error", releaseWatcherAppLockerErr)
478-
}
479-
}(watcherApplock)
480-
481-
// read the upgrade marker
482-
updateMarker, err := LoadMarker(paths.DataFrom(topDir))
483-
if err != nil {
484-
return nil, fmt.Errorf("loading marker: %w", err)
485-
}
486-
487-
if updateMarker == nil {
488-
return nil, ErrNilUpdateMarker
489-
}
490-
525+
func (u *Upgrader) persistManualRollback(topDir string, updateMarker *UpdateMarker) error {
491526
updateMarker.DesiredOutcome = OUTCOME_ROLLBACK
492-
err = SaveMarker(paths.DataFrom(topDir), updateMarker, true)
527+
err := SaveMarker(paths.DataFrom(topDir), updateMarker, true)
493528
if err != nil {
494-
return updateMarker, fmt.Errorf("saving marker: %w", err)
529+
return fmt.Errorf("saving marker: %w", err)
495530
}
496531

497-
return updateMarker, nil
532+
return nil
498533
}
499534

500535
// Ack acks last upgrade action

internal/pkg/agent/application/upgrade/upgrade_test.go

Lines changed: 120 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,23 @@ func TestIsSameReleaseVersion(t *testing.T) {
10381038
}
10391039

10401040
func TestManualRollback(t *testing.T) {
1041+
const updatemarkerwatching456NoRollbackAvailable = `
1042+
version: 4.5.6
1043+
hash: newver
1044+
versioned_home: data/elastic-agent-4.5.6-newver
1045+
updated_on: 2025-07-11T10:11:12.131415Z
1046+
prev_version: 1.2.3
1047+
prev_hash: oldver
1048+
prev_versioned_home: data/elastic-agent-1.2.3-oldver
1049+
acked: false
1050+
action: null
1051+
details:
1052+
target_version: 4.5.6
1053+
state: UPG_WATCHING
1054+
metadata:
1055+
retry_until: null
1056+
desired_outcome: UPGRADE
1057+
`
10411058
const updatemarkerwatching456 = `
10421059
version: 4.5.6
10431060
hash: newver
@@ -1059,6 +1076,7 @@ func TestManualRollback(t *testing.T) {
10591076
valid_until: 2025-07-18T10:11:12.131415Z
10601077
desired_outcome: UPGRADE
10611078
`
1079+
10621080
parsed123Version, err := agtversion.ParseVersion("1.2.3")
10631081
require.NoError(t, err)
10641082
parsed456Version, err := agtversion.ParseVersion("4.5.6")
@@ -1078,19 +1096,40 @@ func TestManualRollback(t *testing.T) {
10781096
versionedHome: "data/elastic-agent-4.5.6-newver",
10791097
}
10801098

1099+
// this is the updated_on timestamp in the example
1100+
nowBeforeTTL, err := time.Parse(time.RFC3339, `2025-07-11T10:11:12Z`)
1101+
require.NoError(t, err, "error parsing nowBeforeTTL")
1102+
1103+
// the update marker yaml assume 7d TLL for rollbacks, let's make an extra day pass
1104+
nowAfterTTL := nowBeforeTTL.Add(8 * 24 * time.Hour)
1105+
10811106
type setupF func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper)
10821107
type postRollbackAssertionsF func(t *testing.T, topDir string)
10831108
type testcase struct {
10841109
name string
10851110
setup setupF
10861111
artifactSettings *artifact.Config
10871112
upgradeSettings *configuration.UpgradeConfig
1113+
now time.Time
10881114
version string
10891115
wantErr assert.ErrorAssertionFunc
10901116
additionalAsserts postRollbackAssertionsF
10911117
}
10921118

10931119
testcases := []testcase{
1120+
{
1121+
name: "no rollback version - rollback fails",
1122+
setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) {
1123+
//do not setup anything here, let the rollback fail
1124+
},
1125+
artifactSettings: artifact.DefaultConfig(),
1126+
upgradeSettings: configuration.DefaultUpgradeConfig(),
1127+
version: "",
1128+
wantErr: func(t assert.TestingT, err error, i ...interface{}) bool {
1129+
return assert.ErrorIs(t, err, ErrEmptyRollbackVersion)
1130+
},
1131+
additionalAsserts: nil,
1132+
},
10941133
{
10951134
name: "no update marker - rollback fails",
10961135
setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) {
@@ -1104,6 +1143,85 @@ func TestManualRollback(t *testing.T) {
11041143
},
11051144
additionalAsserts: nil,
11061145
},
1146+
{
1147+
name: "update marker ok but rollback available is empty - error",
1148+
setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) {
1149+
err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456NoRollbackAvailable), 0600)
1150+
require.NoError(t, err, "error setting up update marker")
1151+
locker := filelock.NewAppLocker(topDir, "watcher.lock")
1152+
err = locker.TryLock()
1153+
require.NoError(t, err, "error locking initial watcher AppLocker")
1154+
watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil)
1155+
},
1156+
artifactSettings: artifact.DefaultConfig(),
1157+
upgradeSettings: configuration.DefaultUpgradeConfig(),
1158+
version: "2.3.4-unknown",
1159+
wantErr: func(t assert.TestingT, err error, i ...interface{}) bool {
1160+
return assert.ErrorIs(t, err, ErrNoRollbacksAvailable)
1161+
},
1162+
additionalAsserts: func(t *testing.T, topDir string) {
1163+
// marker should be untouched
1164+
filePath := markerFilePath(paths.DataFrom(topDir))
1165+
require.FileExists(t, filePath)
1166+
markerFileBytes, readMarkerErr := os.ReadFile(filePath)
1167+
require.NoError(t, readMarkerErr)
1168+
1169+
assert.YAMLEq(t, updatemarkerwatching456NoRollbackAvailable, string(markerFileBytes), "update marker should be untouched")
1170+
},
1171+
},
1172+
{
1173+
name: "update marker ok but version is not available for rollback - error",
1174+
setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) {
1175+
err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600)
1176+
require.NoError(t, err, "error setting up update marker")
1177+
locker := filelock.NewAppLocker(topDir, "watcher.lock")
1178+
err = locker.TryLock()
1179+
require.NoError(t, err, "error locking initial watcher AppLocker")
1180+
watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil)
1181+
},
1182+
artifactSettings: artifact.DefaultConfig(),
1183+
upgradeSettings: configuration.DefaultUpgradeConfig(),
1184+
version: "2.3.4-unknown",
1185+
wantErr: func(t assert.TestingT, err error, i ...interface{}) bool {
1186+
return assert.ErrorIs(t, err, ErrNoRollbacksAvailable)
1187+
},
1188+
additionalAsserts: func(t *testing.T, topDir string) {
1189+
// marker should be untouched
1190+
filePath := markerFilePath(paths.DataFrom(topDir))
1191+
require.FileExists(t, filePath)
1192+
markerFileBytes, readMarkerErr := os.ReadFile(filePath)
1193+
require.NoError(t, readMarkerErr)
1194+
1195+
assert.YAMLEq(t, updatemarkerwatching456, string(markerFileBytes), "update marker should be untouched")
1196+
},
1197+
},
1198+
{
1199+
name: "update marker ok but rollback is expired - error",
1200+
setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) {
1201+
err := os.WriteFile(markerFilePath(paths.DataFrom(topDir)), []byte(updatemarkerwatching456), 0600)
1202+
require.NoError(t, err, "error setting up update marker")
1203+
locker := filelock.NewAppLocker(topDir, "watcher.lock")
1204+
err = locker.TryLock()
1205+
require.NoError(t, err, "error locking initial watcher AppLocker")
1206+
watcherHelper.EXPECT().TakeOverWatcher(t.Context(), mock.Anything, topDir).Return(locker, nil)
1207+
},
1208+
artifactSettings: artifact.DefaultConfig(),
1209+
upgradeSettings: configuration.DefaultUpgradeConfig(),
1210+
now: nowAfterTTL,
1211+
version: "1.2.3",
1212+
wantErr: func(t assert.TestingT, err error, i ...interface{}) bool {
1213+
return assert.ErrorIs(t, err, ErrNoRollbacksAvailable)
1214+
},
1215+
additionalAsserts: func(t *testing.T, topDir string) {
1216+
// marker should be untouched
1217+
filePath := markerFilePath(paths.DataFrom(topDir))
1218+
require.FileExists(t, filePath)
1219+
markerFileBytes, readMarkerErr := os.ReadFile(filePath)
1220+
require.NoError(t, readMarkerErr)
1221+
1222+
assert.YAMLEq(t, updatemarkerwatching456, string(markerFileBytes), "update marker should be untouched")
1223+
},
1224+
},
11071225
{
11081226
name: "update marker ok - takeover watcher, persist rollback and restart most recent watcher",
11091227
setup: func(t *testing.T, topDir string, agent *infomocks.Agent, watcherHelper *MockWatcherHelper) {
@@ -1119,6 +1237,7 @@ func TestManualRollback(t *testing.T) {
11191237
},
11201238
artifactSettings: artifact.DefaultConfig(),
11211239
upgradeSettings: configuration.DefaultUpgradeConfig(),
1240+
now: nowBeforeTTL,
11221241
version: "1.2.3",
11231242
wantErr: assert.NoError,
11241243
additionalAsserts: func(t *testing.T, topDir string) {
@@ -1148,8 +1267,7 @@ func TestManualRollback(t *testing.T) {
11481267

11491268
upgrader, err := NewUpgrader(log, tc.artifactSettings, tc.upgradeSettings, mockAgentInfo, mockWatcherHelper)
11501269
require.NoError(t, err, "error instantiating upgrader")
1151-
1152-
_, err = upgrader.forceRollbackToPreviousVersion(t.Context(), topDir, tc.version, nil, nil)
1270+
_, err = upgrader.forceRollbackToPreviousVersion(t.Context(), topDir, tc.now, tc.version, nil)
11531271
tc.wantErr(t, err, "unexpected error returned by forceRollbackToPreviousVersion()")
11541272
if tc.additionalAsserts != nil {
11551273
tc.additionalAsserts(t, topDir)

0 commit comments

Comments
 (0)