Skip to content

Commit 17eed84

Browse files
[8.18] (backport #9562) fix: scheduled upgrade details state (#9667)
* fix: scheduled upgrade details state (#9562) * fix: persisting and reporting of upgrade details * ci: align and extend dispatcher unit-tests * ci: update coordinator and application new signatures in unit-tests * ci: add integration tests for scheduled upgrade details * doc: add changelog fragment * doc: reword existing and add more comments in code * feat: change queuedUpgradeActions inside dispatchCancelActions to have values of struct{} * fix: remove redundant continue * fix: dedupe upgrade actions from fleetgateway actions, handle correctly the expiration of retried stored actions, and update upgrade details on retries (cherry picked from commit ff80471) # Conflicts: # internal/pkg/agent/application/application.go # internal/pkg/agent/application/coordinator/coordinator.go # internal/pkg/agent/cmd/run.go * fix: resolve conflicts * fix: define missing test helper func * fix: ignore QF1007 merge conditional assignment into variable declaration --------- Co-authored-by: Panos Koutsovasilis <[email protected]>
1 parent bc5e95a commit 17eed84

File tree

15 files changed

+1148
-305
lines changed

15 files changed

+1148
-305
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
kind: bug-fix
2+
summary: fix reporting of scheduled upgrade details across restarts and cancels
3+
component: elastic-agent
4+
pr: https://github.com/elastic/elastic-agent/pull/9562
5+
issue: https://github.com/elastic/elastic-agent/issues/8778

internal/pkg/agent/application/actions/handlers/handler_action_upgrade_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ func TestUpgradeHandler(t *testing.T) {
115115
return nil, nil
116116
},
117117
},
118-
nil, nil, nil, nil, nil, false, nil, nil)
118+
nil, nil, nil, nil, nil, false, nil, nil, nil)
119119
//nolint:errcheck // We don't need the termination state of the Coordinator
120120
go c.Run(ctx)
121121

@@ -174,7 +174,7 @@ func TestUpgradeHandlerSameVersion(t *testing.T) {
174174
return nil, err
175175
},
176176
},
177-
nil, nil, nil, nil, nil, false, nil, nil)
177+
nil, nil, nil, nil, nil, false, nil, nil, nil)
178178
//nolint:errcheck // We don't need the termination state of the Coordinator
179179
go c.Run(ctx)
180180

@@ -233,7 +233,7 @@ func TestDuplicateActionsHandled(t *testing.T) {
233233
return nil, nil
234234
},
235235
},
236-
nil, nil, nil, nil, nil, false, nil, acker)
236+
nil, nil, nil, nil, nil, false, nil, acker, nil)
237237
//nolint:errcheck // We don't need the termination state of the Coordinator
238238
go c.Run(ctx)
239239

@@ -327,7 +327,7 @@ func TestUpgradeHandlerNewVersion(t *testing.T) {
327327
return nil, nil
328328
},
329329
},
330-
nil, nil, nil, nil, nil, false, nil, nil)
330+
nil, nil, nil, nil, nil, false, nil, nil, nil)
331331
//nolint:errcheck // We don't need the termination state of the Coordinator
332332
go c.Run(ctx)
333333

internal/pkg/agent/application/application.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ import (
1414
"github.com/elastic/elastic-agent-libs/logp"
1515

1616
"github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator"
17+
"github.com/elastic/elastic-agent/internal/pkg/agent/application/dispatcher"
1718
"github.com/elastic/elastic-agent/internal/pkg/agent/application/info"
1819
"github.com/elastic/elastic-agent/internal/pkg/agent/application/monitoring"
1920
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
2021
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade"
22+
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details"
2123
"github.com/elastic/elastic-agent/internal/pkg/agent/configuration"
2224
"github.com/elastic/elastic-agent/internal/pkg/agent/errors"
2325
"github.com/elastic/elastic-agent/internal/pkg/agent/storage"
@@ -32,6 +34,7 @@ import (
3234
"github.com/elastic/elastic-agent/internal/pkg/fleetapi/acker/retrier"
3335
fleetclient "github.com/elastic/elastic-agent/internal/pkg/fleetapi/client"
3436
otelmanager "github.com/elastic/elastic-agent/internal/pkg/otel/manager"
37+
"github.com/elastic/elastic-agent/internal/pkg/queue"
3538
"github.com/elastic/elastic-agent/internal/pkg/release"
3639
"github.com/elastic/elastic-agent/pkg/component"
3740
"github.com/elastic/elastic-agent/pkg/component/runtime"
@@ -57,6 +60,7 @@ func New(
5760
fleetInitTimeout time.Duration,
5861
disableMonitoring bool,
5962
override CfgOverrider,
63+
initialUpgradeDetails *details.Details,
6064
modifiers ...component.PlatformModifier,
6165
) (*coordinator.Coordinator, coordinator.ConfigManager, composable.Controller, error) {
6266

@@ -141,7 +145,6 @@ func New(
141145
var compModifiers = []coordinator.ComponentsModifier{InjectAPMConfig}
142146
var composableManaged bool
143147
var isManaged bool
144-
145148
var actionAcker acker.Acker
146149
if testingMode {
147150
log.Info("Elastic Agent has been started in testing mode and is managed through the control protocol")
@@ -210,8 +213,19 @@ func New(
210213
batchedAcker := lazy.NewAcker(fleetAcker, log, lazy.WithRetrier(retrier))
211214
actionAcker = stateStore.NewStateStoreActionAcker(batchedAcker, stateStorage)
212215

216+
actionQueue, err := queue.NewActionQueue(stateStorage.Queue(), stateStorage)
217+
if err != nil {
218+
return nil, nil, nil, fmt.Errorf("unable to initialize action queue: %w", err)
219+
}
220+
221+
if initialUpgradeDetails == nil {
222+
// initial upgrade details are nil (normally the caller supplies the ones from the marker file at this point),
223+
// hence, extract any scheduled upgrade details from the action queue.
224+
initialUpgradeDetails = dispatcher.GetScheduledUpgradeDetails(log, actionQueue.Actions(), time.Now())
225+
}
226+
213227
// TODO: stop using global state
214-
managed, err = newManagedConfigManager(ctx, log, agentInfo, cfg, store, runtime, fleetInitTimeout, paths.Top(), client, fleetAcker, actionAcker, retrier, stateStorage, upgrader)
228+
managed, err = newManagedConfigManager(ctx, log, agentInfo, cfg, store, runtime, fleetInitTimeout, paths.Top(), client, fleetAcker, actionAcker, retrier, stateStorage, actionQueue, upgrader)
215229
if err != nil {
216230
return nil, nil, nil, err
217231
}
@@ -225,7 +239,7 @@ func New(
225239
}
226240

227241
otelManager := otelmanager.NewOTelManager(log.Named("otel_manager"))
228-
coord := coordinator.New(log, cfg, logLevel, agentInfo, specs, reexec, upgrader, runtime, configMgr, varsManager, caps, monitor, isManaged, otelManager, actionAcker, compModifiers...)
242+
coord := coordinator.New(log, cfg, logLevel, agentInfo, specs, reexec, upgrader, runtime, configMgr, varsManager, caps, monitor, isManaged, otelManager, actionAcker, initialUpgradeDetails, compModifiers...)
229243
if managed != nil {
230244
// the coordinator requires the config manager as well as in managed-mode the config manager requires the
231245
// coordinator, so it must be set here once the coordinator is created

internal/pkg/agent/application/application_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ func TestLimitsLog(t *testing.T) {
6464
time.Millisecond, // fleetInitTimeout
6565
true, // disable monitoring
6666
nil, // no configuration overrides
67+
nil,
6768
)
6869
require.NoError(t, err)
6970

internal/pkg/agent/application/coordinator/coordinator.go

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,25 @@ type UpdateComponentChange struct {
368368
}
369369

370370
// New creates a new coordinator.
371-
func New(logger *logger.Logger, cfg *configuration.Configuration, logLevel logp.Level, agentInfo info.Agent, specs component.RuntimeSpecs, reexecMgr ReExecManager, upgradeMgr UpgradeManager, runtimeMgr RuntimeManager, configMgr ConfigManager, varsMgr VarsManager, caps capabilities.Capabilities, monitorMgr MonitorManager, isManaged bool, otelMgr OTelManager, fleetAcker acker.Acker, modifiers ...ComponentsModifier) *Coordinator {
371+
func New(
372+
logger *logger.Logger,
373+
cfg *configuration.Configuration,
374+
logLevel logp.Level,
375+
agentInfo info.Agent,
376+
specs component.RuntimeSpecs,
377+
reexecMgr ReExecManager,
378+
upgradeMgr UpgradeManager,
379+
runtimeMgr RuntimeManager,
380+
configMgr ConfigManager,
381+
varsMgr VarsManager,
382+
caps capabilities.Capabilities,
383+
monitorMgr MonitorManager,
384+
isManaged bool,
385+
otelMgr OTelManager,
386+
fleetAcker acker.Acker,
387+
initialUpgradeDetails *details.Details,
388+
modifiers ...ComponentsModifier,
389+
) *Coordinator {
372390
var fleetState cproto.State
373391
var fleetMessage string
374392
if !isManaged {
@@ -377,11 +395,12 @@ func New(logger *logger.Logger, cfg *configuration.Configuration, logLevel logp.
377395
fleetMessage = "Not enrolled into Fleet"
378396
}
379397
state := State{
380-
State: agentclient.Starting,
381-
Message: "Starting",
382-
FleetState: fleetState,
383-
FleetMessage: fleetMessage,
384-
LogLevel: logLevel,
398+
State: agentclient.Starting,
399+
Message: "Starting",
400+
FleetState: fleetState,
401+
FleetMessage: fleetMessage,
402+
LogLevel: logLevel,
403+
UpgradeDetails: initialUpgradeDetails,
385404
}
386405
c := &Coordinator{
387406
logger: logger,

internal/pkg/agent/application/coordinator/coordinator_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1096,7 +1096,7 @@ func createCoordinator(t testing.TB, ctx context.Context, opts ...CoordinatorOpt
10961096
acker = &fakeActionAcker{}
10971097
}
10981098

1099-
coord := New(l, nil, logp.DebugLevel, ai, specs, &fakeReExecManager{}, upgradeManager, rm, cfgMgr, varsMgr, caps, monitoringMgr, o.managed, otelMgr, acker)
1099+
coord := New(l, nil, logp.DebugLevel, ai, specs, &fakeReExecManager{}, upgradeManager, rm, cfgMgr, varsMgr, caps, monitoringMgr, o.managed, otelMgr, acker, nil)
11001100
return coord, cfgMgr, varsMgr
11011101
}
11021102

0 commit comments

Comments
 (0)