Skip to content

Commit 3e7324e

Browse files
ci: add integration tests for scheduled upgrade details
1 parent af86790 commit 3e7324e

File tree

4 files changed

+362
-5
lines changed

4 files changed

+362
-5
lines changed

testing/fleetservertest/ackableactions.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@ import (
1111
)
1212

1313
type ActionTmpl struct {
14-
AgentID string
15-
ActionID string
16-
Data string
17-
Type string
14+
AgentID string
15+
ActionID string
16+
Data string
17+
Type string
18+
StartTime string
19+
Expiration string
1820
}
1921

2022
type CheckinData struct {

testing/fleetservertest/checkin.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,9 @@ const actionTemplate = `{
8787
"data": {{.Data}},
8888
"id": "{{.ActionID}}",
8989
"input_type": "",
90-
"type": "{{.Type}}"
90+
"type": "{{.Type}}",
91+
"start_time": "{{.StartTime}}",
92+
"expiration": "{{.Expiration}}"
9193
}`
9294

9395
func NewAction(data ActionTmpl) (AckableAction, error) {

testing/fleetservertest/models.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import (
88
"encoding/json"
99
"fmt"
1010
"net/http"
11+
12+
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details"
1113
)
1214

1315
// =============================================================================
@@ -117,6 +119,8 @@ type CheckinRequest struct {
117119

118120
// An optional timeout value that informs fleet-server of when a client will time out on it's checkin request. If not specified fleet-server will use the timeout values specified in the config (defaults to 5m polling and a 10m write timeout). The value, if specified is expected to be a string that is parsable by [time.ParseDuration](https://pkg.go.dev/time#ParseDuration). If specified fleet-server will set its poll timeout to `max(1m, poll_timeout-2m)` and its write timeout to `max(2m, poll_timout-1m)`.
119121
PollTimeout string `json:"poll_timeout,omitempty"`
122+
123+
UpgradeDetails *details.Details `json:"upgrade_details,omitempty"`
120124
}
121125
type CheckinResponse struct {
122126

Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License 2.0;
3+
// you may not use this file except in compliance with the Elastic License 2.0.
4+
5+
//go:build integration
6+
7+
package ess
8+
9+
import (
10+
"context"
11+
"encoding/json"
12+
"fmt"
13+
"net/http"
14+
"sync"
15+
"testing"
16+
"time"
17+
18+
"github.com/stretchr/testify/assert"
19+
"github.com/stretchr/testify/require"
20+
21+
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details"
22+
"github.com/elastic/elastic-agent/internal/pkg/fleetapi"
23+
integrationtest "github.com/elastic/elastic-agent/pkg/testing"
24+
"github.com/elastic/elastic-agent/pkg/testing/define"
25+
"github.com/elastic/elastic-agent/pkg/testing/tools/check"
26+
"github.com/elastic/elastic-agent/pkg/testing/tools/testcontext"
27+
"github.com/elastic/elastic-agent/testing/fleetservertest"
28+
"github.com/elastic/elastic-agent/testing/integration"
29+
)
30+
31+
func TestFleetScheduledUpgrade(t *testing.T) {
32+
_ = define.Require(t, define.Requirements{
33+
Group: integration.Fleet,
34+
Stack: &define.Stack{},
35+
Local: false, // requires Agent installation
36+
Sudo: true, // requires Agent installation
37+
})
38+
39+
ctx, cancel := testcontext.WithTimeout(t, t.Context(), time.Minute*10)
40+
defer cancel()
41+
42+
apiKey, policy := createBasicFleetPolicyData(t, "http://fleet-server:8221")
43+
checkinWithAcker := fleetservertest.NewCheckinActionsWithAcker()
44+
nextActionGenerator := checkinWithAcker.ActionsGenerator()
45+
46+
var checkInRequest struct {
47+
sync.Mutex
48+
updatedTime time.Time
49+
UpgradeDetails *details.Details
50+
}
51+
52+
handlers := &fleetservertest.Handlers{
53+
APIKey: apiKey.Key,
54+
EnrollmentToken: "enrollmentToken",
55+
AgentID: policy.AgentID, // as there is no enroll, the agentID needs to be manually set
56+
CheckinFn: func(ctx context.Context, h *fleetservertest.Handlers, id string, userAgent string,
57+
acceptEncoding string, checkinRequest fleetservertest.CheckinRequest,
58+
) (*fleetservertest.CheckinResponse, *fleetservertest.HTTPError) {
59+
if id != policy.AgentID {
60+
return nil, &fleetservertest.HTTPError{
61+
StatusCode: http.StatusNotFound,
62+
Message: fmt.Sprintf("agent %q not found", id),
63+
}
64+
}
65+
66+
checkInRequest.Lock()
67+
checkInRequest.updatedTime = time.Now()
68+
checkInRequest.UpgradeDetails = checkinRequest.UpgradeDetails
69+
checkInRequest.Unlock()
70+
71+
data, hErr := nextActionGenerator()
72+
if hErr != nil {
73+
return nil, hErr
74+
}
75+
76+
respStr := fleetservertest.NewCheckinResponse(data.AckToken, data.Actions...)
77+
resp := fleetservertest.CheckinResponse{}
78+
err := json.Unmarshal(
79+
[]byte(respStr),
80+
&resp)
81+
if err != nil {
82+
return nil, &fleetservertest.HTTPError{
83+
StatusCode: http.StatusInternalServerError,
84+
Message: fmt.Sprintf("failed to CheckinResponse: %v", err),
85+
}
86+
}
87+
88+
// simulate long poll
89+
time.Sleep(data.Delay)
90+
91+
return &resp, nil
92+
},
93+
EnrollFn: fleetservertest.NewHandlerEnroll(policy.AgentID, policy.PolicyID, apiKey),
94+
AckFn: fleetservertest.NewHandlerAckWithAcker(checkinWithAcker.Acker()),
95+
StatusFn: fleetservertest.NewHandlerStatusHealthy(),
96+
}
97+
98+
fleetServer := fleetservertest.NewServer(handlers, fleetservertest.WithRequestLog(t.Logf))
99+
defer fleetServer.Close()
100+
101+
fixture, err := define.NewFixtureFromLocalBuild(t,
102+
define.Version(),
103+
integrationtest.WithAllowErrors(),
104+
integrationtest.WithLogOutput())
105+
require.NoError(t, err, "SetupTest: NewFixtureFromLocalBuild failed")
106+
err = fixture.EnsurePrepared(ctx)
107+
require.NoError(t, err, "SetupTest: fixture.Prepare failed")
108+
109+
out, err := fixture.Install(
110+
ctx,
111+
&integrationtest.InstallOpts{
112+
Force: true,
113+
NonInteractive: true,
114+
Insecure: true,
115+
Privileged: false,
116+
EnrollOpts: integrationtest.EnrollOpts{
117+
URL: fleetServer.LocalhostURL,
118+
EnrollmentToken: "anythingWillDO",
119+
}})
120+
require.NoErrorf(t, err, "Error when installing agent, output: %s", out)
121+
122+
// Wait for the agent to connect to Fleet and report HEALTHY
123+
check.ConnectedToFleet(ctx, t, fixture, 5*time.Minute)
124+
125+
// Simulate a scheduled upgrade action
126+
targetVersion := "255.0.0"
127+
t.Run("scheduled upgrade action", func(t *testing.T) {
128+
scheduledActionUUID := "scheduled-action-id"
129+
scheduledUpgradeAction, err := fleetservertest.NewAction(fleetservertest.ActionTmpl{
130+
AgentID: policy.AgentID,
131+
ActionID: scheduledActionUUID,
132+
Type: fleetapi.ActionTypeUpgrade,
133+
Data: fmt.Sprintf(`{"version": "%s"}`, targetVersion),
134+
StartTime: time.Now().Add(time.Hour).Format(time.RFC3339),
135+
})
136+
require.NoError(t, err, "failed to create scheduled upgrade action")
137+
checkinWithAcker.AddCheckin("token", 1*time.Second, scheduledUpgradeAction)
138+
139+
// Wait and check that elastic-agent has reported the scheduled upgrade
140+
// in the upgrade details
141+
require.EventuallyWithT(t, func(collect *assert.CollectT) {
142+
checkInRequest.Lock()
143+
defer checkInRequest.Unlock()
144+
if !assert.NotNil(collect, checkInRequest.UpgradeDetails) {
145+
return
146+
}
147+
assert.Equal(collect, targetVersion, checkInRequest.UpgradeDetails.TargetVersion)
148+
assert.EqualValues(collect, details.StateScheduled, checkInRequest.UpgradeDetails.State)
149+
assert.Equal(collect, scheduledActionUUID, checkInRequest.UpgradeDetails.ActionID)
150+
}, 5*time.Minute, 500*time.Millisecond, "agent did not report scheduled upgrade")
151+
152+
// Deliberately restart elastic-agent to check that it still reports
153+
// correctly the scheduled upgrade details
154+
restartAgentNTimes(t, 3, 300*time.Millisecond)
155+
156+
// Wait and check that elastic-agent has a more recent checkin with
157+
// the correct upgrade details
158+
timeSnapshot := time.Now()
159+
require.EventuallyWithT(t, func(collect *assert.CollectT) {
160+
checkInRequest.Lock()
161+
defer checkInRequest.Unlock()
162+
if !assert.NotNil(collect, checkInRequest.UpgradeDetails) {
163+
return
164+
}
165+
assert.Less(collect, timeSnapshot, checkInRequest.updatedTime)
166+
assert.Equal(collect, targetVersion, checkInRequest.UpgradeDetails.TargetVersion)
167+
assert.EqualValues(collect, details.StateScheduled, checkInRequest.UpgradeDetails.State)
168+
assert.Equal(collect, scheduledActionUUID, checkInRequest.UpgradeDetails.ActionID)
169+
}, 5*time.Minute, 500*time.Millisecond, "agent did not report scheduled upgrade after restart")
170+
171+
// Simulate a cancel action of the scheduled upgrade
172+
cancelActionUUID := "cancel-action-id"
173+
cancelAction, err := fleetservertest.NewAction(fleetservertest.ActionTmpl{
174+
AgentID: policy.AgentID,
175+
Type: fleetapi.ActionTypeCancel,
176+
ActionID: cancelActionUUID,
177+
Data: fmt.Sprintf(`{"target_id": "%s"}`, scheduledActionUUID),
178+
})
179+
checkinWithAcker.AddCheckin("token", 1*time.Second, cancelAction)
180+
181+
// Wait and check that elastic-agent has reported a more recent checkin
182+
// with empty upgrade details
183+
timeSnapshot = time.Now()
184+
require.EventuallyWithT(t, func(collect *assert.CollectT) {
185+
checkInRequest.Lock()
186+
defer checkInRequest.Unlock()
187+
assert.Less(collect, timeSnapshot, checkInRequest.updatedTime)
188+
assert.Nil(collect, checkInRequest.UpgradeDetails)
189+
}, 5*time.Minute, 500*time.Millisecond, "agent did not report empty upgrade details after cancel")
190+
})
191+
192+
t.Run("expired scheduled upgrade action", func(t *testing.T) {
193+
scheduledExpiredActionUUID := "expired-scheduled-action-id"
194+
scheduledExpiredUpgradeAction, err := fleetservertest.NewAction(fleetservertest.ActionTmpl{
195+
AgentID: policy.AgentID,
196+
ActionID: scheduledExpiredActionUUID,
197+
Type: fleetapi.ActionTypeUpgrade,
198+
Data: fmt.Sprintf(`{"version": "%s"}`, targetVersion),
199+
StartTime: time.Now().Add(-time.Hour).Format(time.RFC3339),
200+
Expiration: time.Now().Add(-time.Hour).Format(time.RFC3339),
201+
})
202+
require.NoError(t, err, "failed to create expired scheduled upgrade action")
203+
checkinWithAcker.AddCheckin("token", 1*time.Second, scheduledExpiredUpgradeAction)
204+
205+
// Wait and check that elastic-agent has reported the expired scheduled upgrade
206+
// in the upgrade details
207+
require.EventuallyWithT(t, func(collect *assert.CollectT) {
208+
checkInRequest.Lock()
209+
defer checkInRequest.Unlock()
210+
if !assert.NotNil(collect, checkInRequest.UpgradeDetails) {
211+
return
212+
}
213+
assert.Equal(collect, targetVersion, checkInRequest.UpgradeDetails.TargetVersion)
214+
assert.EqualValues(collect, details.StateFailed, checkInRequest.UpgradeDetails.State)
215+
assert.Equal(collect, scheduledExpiredActionUUID, checkInRequest.UpgradeDetails.ActionID)
216+
}, 5*time.Minute, 500*time.Millisecond, "agent did not report expired scheduled upgrade")
217+
218+
// Deliberately restart elastic-agent to check that it still reports
219+
// correctly the expired scheduled upgrade details
220+
restartAgentNTimes(t, 3, 300*time.Millisecond)
221+
222+
// Wait and check that elastic-agent has a more recent checkin with
223+
// the correct upgrade details
224+
timeSnapshot := time.Now()
225+
require.EventuallyWithT(t, func(collect *assert.CollectT) {
226+
checkInRequest.Lock()
227+
defer checkInRequest.Unlock()
228+
if !assert.NotNil(collect, checkInRequest.UpgradeDetails) {
229+
return
230+
}
231+
assert.Less(collect, timeSnapshot, checkInRequest.updatedTime)
232+
assert.Equal(collect, targetVersion, checkInRequest.UpgradeDetails.TargetVersion)
233+
assert.EqualValues(collect, details.StateFailed, checkInRequest.UpgradeDetails.State)
234+
assert.Equal(collect, scheduledExpiredActionUUID, checkInRequest.UpgradeDetails.ActionID)
235+
}, 5*time.Minute, 500*time.Millisecond, "agent did not report expired scheduled upgrade after restart")
236+
237+
// Simulate a cancel action of the scheduled upgrade
238+
cancelExpiredActionUUID := "cancel-expired-action-id"
239+
cancelExpiredAction, err := fleetservertest.NewAction(fleetservertest.ActionTmpl{
240+
AgentID: policy.AgentID,
241+
Type: fleetapi.ActionTypeCancel,
242+
ActionID: cancelExpiredActionUUID,
243+
Data: fmt.Sprintf(`{"target_id": "%s"}`, scheduledExpiredActionUUID),
244+
})
245+
checkinWithAcker.AddCheckin("token", 1*time.Second, cancelExpiredAction)
246+
247+
// Wait and check that elastic-agent has reported a more recent checkin
248+
// with empty upgrade details
249+
timeSnapshot = time.Now()
250+
require.EventuallyWithT(t, func(collect *assert.CollectT) {
251+
checkInRequest.Lock()
252+
defer checkInRequest.Unlock()
253+
assert.Less(collect, timeSnapshot, checkInRequest.updatedTime)
254+
assert.Nil(collect, checkInRequest.UpgradeDetails)
255+
}, 5*time.Minute, 500*time.Millisecond, "agent did not report empty upgrade details after cancel")
256+
})
257+
258+
t.Run("initially expired scheduled upgrade action receive new upgrade action", func(t *testing.T) {
259+
scheduledExpiredActionUUID := "expired-scheduled-action-id"
260+
scheduledExpiredUpgradeAction, err := fleetservertest.NewAction(fleetservertest.ActionTmpl{
261+
AgentID: policy.AgentID,
262+
ActionID: scheduledExpiredActionUUID,
263+
Type: fleetapi.ActionTypeUpgrade,
264+
Data: fmt.Sprintf(`{"version": "%s"}`, targetVersion),
265+
StartTime: time.Now().Add(-time.Hour).Format(time.RFC3339),
266+
Expiration: time.Now().Add(-time.Hour).Format(time.RFC3339),
267+
})
268+
require.NoError(t, err, "failed to create expired scheduled upgrade action")
269+
checkinWithAcker.AddCheckin("token", 1*time.Second, scheduledExpiredUpgradeAction)
270+
271+
// Wait and check that elastic-agent has reported the expired scheduled upgrade
272+
// in the upgrade details
273+
require.EventuallyWithT(t, func(collect *assert.CollectT) {
274+
checkInRequest.Lock()
275+
defer checkInRequest.Unlock()
276+
if !assert.NotNil(collect, checkInRequest.UpgradeDetails) {
277+
return
278+
}
279+
assert.Equal(collect, targetVersion, checkInRequest.UpgradeDetails.TargetVersion)
280+
assert.EqualValues(collect, details.StateFailed, checkInRequest.UpgradeDetails.State)
281+
assert.Equal(collect, scheduledExpiredActionUUID, checkInRequest.UpgradeDetails.ActionID)
282+
}, 5*time.Minute, 500*time.Millisecond, "agent did not report expired scheduled upgrade")
283+
284+
// send a new scheduled action through the checkin
285+
scheduledActionUUID := "scheduled-action-id"
286+
scheduledUpgradeAction, err := fleetservertest.NewAction(fleetservertest.ActionTmpl{
287+
AgentID: policy.AgentID,
288+
ActionID: scheduledActionUUID,
289+
Type: fleetapi.ActionTypeUpgrade,
290+
Data: fmt.Sprintf(`{"version": "%s"}`, targetVersion),
291+
StartTime: time.Now().Add(time.Hour).Format(time.RFC3339),
292+
})
293+
require.NoError(t, err, "failed to create scheduled upgrade action")
294+
checkinWithAcker.AddCheckin("token", 1*time.Second, scheduledUpgradeAction)
295+
296+
// Wait and check that elastic-agent has reported the scheduled upgrade
297+
// in the upgrade details
298+
require.EventuallyWithT(t, func(collect *assert.CollectT) {
299+
checkInRequest.Lock()
300+
defer checkInRequest.Unlock()
301+
if !assert.NotNil(collect, checkInRequest.UpgradeDetails) {
302+
return
303+
}
304+
assert.Equal(collect, targetVersion, checkInRequest.UpgradeDetails.TargetVersion)
305+
assert.EqualValues(collect, details.StateScheduled, checkInRequest.UpgradeDetails.State)
306+
assert.Equal(collect, scheduledActionUUID, checkInRequest.UpgradeDetails.ActionID)
307+
}, 5*time.Minute, 500*time.Millisecond, "agent did not report scheduled upgrade")
308+
309+
// Deliberately restart elastic-agent to check that it still reports
310+
// correctly the scheduled upgrade details
311+
restartAgentNTimes(t, 3, 300*time.Millisecond)
312+
313+
// Wait and check that elastic-agent has a more recent checkin with
314+
// the correct upgrade details
315+
timeSnapshot := time.Now()
316+
require.EventuallyWithT(t, func(collect *assert.CollectT) {
317+
checkInRequest.Lock()
318+
defer checkInRequest.Unlock()
319+
if !assert.NotNil(collect, checkInRequest.UpgradeDetails) {
320+
return
321+
}
322+
assert.Less(collect, timeSnapshot, checkInRequest.updatedTime)
323+
assert.Equal(collect, targetVersion, checkInRequest.UpgradeDetails.TargetVersion)
324+
assert.EqualValues(collect, details.StateScheduled, checkInRequest.UpgradeDetails.State)
325+
assert.Equal(collect, scheduledActionUUID, checkInRequest.UpgradeDetails.ActionID)
326+
}, 5*time.Minute, 500*time.Millisecond, "agent did not report scheduled upgrade after restart")
327+
328+
// Simulate a cancel action of the scheduled upgrade
329+
cancelActionUUID := "cancel-action-id"
330+
cancelAction, err := fleetservertest.NewAction(fleetservertest.ActionTmpl{
331+
AgentID: policy.AgentID,
332+
Type: fleetapi.ActionTypeCancel,
333+
ActionID: cancelActionUUID,
334+
Data: fmt.Sprintf(`{"target_id": "%s"}`, scheduledActionUUID),
335+
})
336+
checkinWithAcker.AddCheckin("token", 1*time.Second, cancelAction)
337+
338+
// Wait and check that elastic-agent has reported a more recent checkin
339+
// with empty upgrade details
340+
timeSnapshot = time.Now()
341+
require.EventuallyWithT(t, func(collect *assert.CollectT) {
342+
checkInRequest.Lock()
343+
defer checkInRequest.Unlock()
344+
assert.Less(collect, timeSnapshot, checkInRequest.updatedTime)
345+
assert.Nil(collect, checkInRequest.UpgradeDetails)
346+
}, 5*time.Minute, 500*time.Millisecond, "agent did not report empty upgrade details after cancel")
347+
})
348+
349+
}

0 commit comments

Comments
 (0)