Skip to content
Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: enhancement

# Change summary; a 80ish characters long description of the change.
summary: Clear agent.upgrade_attempts when upgrade is complete.

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
description: |
The new AutomaticAgentUpgradeTask Kibana task sets the upgrade_attempts property in agents it upgrades.
This property is used to track upgrade retries and should therefore be cleared when the upgrade is complete.

# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
component: fleet-server

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
pr: https://github.com/elastic/fleet-server/pull/4528

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
# issue:
48 changes: 31 additions & 17 deletions internal/pkg/api/handleCheckin.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ func (ct *CheckinT) validateRequest(zlog zerolog.Logger, w http.ResponseWriter,
}

wTime := pollDuration + time.Minute
rc := http.NewResponseController(w) //nolint:bodyclose // we are working with a ResponseWriter not a Respons
rc := http.NewResponseController(w)
if err := rc.SetWriteDeadline(start.Add(wTime)); err != nil {
zlog.Warn().Err(err).Time("write_deadline", start.Add(wTime)).Msg("Unable to set checkin write deadline.")
} else {
Expand Down Expand Up @@ -398,6 +398,26 @@ func (ct *CheckinT) ProcessRequest(zlog zerolog.Logger, w http.ResponseWriter, r
return ct.writeResponse(zlog, w, r, agent, resp)
}

func (ct *CheckinT) verifyActionExists(vCtx context.Context, vSpan *apm.Span, agent *model.Agent, details *UpgradeDetails) (*model.Action, error) {
action, ok := ct.cache.GetAction(details.ActionId)
if !ok {
actions, err := dl.FindAction(vCtx, ct.bulker, details.ActionId)
if err != nil {
vSpan.End()
return nil, fmt.Errorf("unable to find upgrade_details action: %w", err)
}
if len(actions) == 0 {
vSpan.End()
zerolog.Ctx(vCtx).Warn().Msgf("upgrade_details no action for id %q found (agent id %q)", details.ActionId, agent.Agent.ID)
return nil, nil
}
action = actions[0]
ct.cache.SetAction(action)
}
vSpan.End()
return &action, nil
}

// processUpgradeDetails will verify and set the upgrade_details section of an agent document based on checkin value.
// if the agent doc and checkin details are both nil the method is a nop
// if the checkin upgrade_details is nil but there was a previous value in the agent doc, fleet-server treats it as a successful upgrade
Expand All @@ -412,24 +432,14 @@ func (ct *CheckinT) processUpgradeDetails(ctx context.Context, agent *model.Agen
}
// update docs with in progress details

// verify action exists
vSpan, vCtx := apm.StartSpan(ctx, "Check update action", "validate")
action, ok := ct.cache.GetAction(details.ActionId)
if !ok {
actions, err := dl.FindAction(vCtx, ct.bulker, details.ActionId)
if err != nil {
vSpan.End()
return fmt.Errorf("unable to find upgrade_details action: %w", err)
}
if len(actions) == 0 {
vSpan.End()
zerolog.Ctx(vCtx).Warn().Msgf("upgrade_details no action for id %q found (agent id %q)", details.ActionId, agent.Agent.ID)
return nil
}
action = actions[0]
ct.cache.SetAction(action)
action, err := ct.verifyActionExists(ctx, vSpan, agent, details)
if err != nil {
return err
}
if action == nil {
return nil
}
vSpan.End()

// link action with APM spans
var links []apm.SpanLink
Expand Down Expand Up @@ -500,6 +510,10 @@ func (ct *CheckinT) processUpgradeDetails(ctx context.Context, agent *model.Agen
doc := bulk.UpdateFields{
dl.FieldUpgradeDetails: details,
}
if agent.UpgradeAttempts != nil && details.State == UpgradeDetailsStateUPGWATCHING {
doc[dl.FieldUpgradeAttempts] = nil
}

body, err := doc.Marshal()
if err != nil {
return err
Expand Down
30 changes: 30 additions & 0 deletions internal/pkg/api/handleCheckin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"testing"
"time"

"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
"github.com/elastic/fleet-server/v7/internal/pkg/cache"
"github.com/elastic/fleet-server/v7/internal/pkg/checkin"
"github.com/elastic/fleet-server/v7/internal/pkg/config"
Expand Down Expand Up @@ -312,6 +313,17 @@ func TestResolveSeqNo(t *testing.T) {

func TestProcessUpgradeDetails(t *testing.T) {
esd := model.ESDocument{Id: "doc-ID"}
doc := bulk.UpdateFields{
dl.FieldUpgradeDetails: &UpgradeDetails{
ActionId: "test-action",
State: UpgradeDetailsStateUPGWATCHING,
},
dl.FieldUpgradeAttempts: nil,
}
body, err := doc.Marshal()
if err != nil {
t.Fatalf("marshal error: %v", err)
}
tests := []struct {
name string
agent *model.Agent
Expand Down Expand Up @@ -625,6 +637,24 @@ func TestProcessUpgradeDetails(t *testing.T) {
return mCache
},
err: ErrInvalidUpgradeMetadata,
}, {
name: "clear upgrade attempts when watching",
agent: &model.Agent{ESDocument: esd, Agent: &model.AgentMetadata{ID: "test-agent"}, UpgradeAttempts: make([]string, 0)},
details: &UpgradeDetails{
ActionId: "test-action",
State: UpgradeDetailsStateUPGWATCHING,
},
bulk: func() *ftesting.MockBulk {
mBulk := ftesting.NewMockBulk()
mBulk.On("Update", mock.Anything, dl.FleetAgents, "doc-ID", body, mock.Anything, mock.Anything).Return(nil)
return mBulk
},
cache: func() *testcache.MockCache {
mCache := testcache.NewMockCache()
mCache.On("GetAction", "test-action").Return(model.Action{}, true)
return mCache
},
err: nil,
}}

for _, tc := range tests {
Expand Down
1 change: 1 addition & 0 deletions internal/pkg/dl/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ const (
FieldUpgradeStartedAt = "upgrade_started_at"
FieldUpgradeStatus = "upgrade_status"
FieldUpgradeDetails = "upgrade_details"
FieldUpgradeAttempts = "upgrade_attempts"

FieldAuditUnenrolledTime = "audit_unenrolled_time"
FieldAuditUnenrolledReason = "audit_unenrolled_reason"
Expand Down
3 changes: 3 additions & 0 deletions internal/pkg/model/schema.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions model/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,13 @@
"description": "Additional upgrade status details.",
"type": "object"
},
"upgrade_attempts": {
"description": "List of timestamps of attempts of Elastic Agent automatic upgrades",
"type": "array",
"items": {
"type": "string"
}
},
"replace_token": {
"description": "hash of token provided during enrollment that allows replacement by another enrollment with same ID",
"type": "string"
Expand Down
Loading