Skip to content

Commit 6ffd60c

Browse files
authored
Reduce integration tests flakiness by adding retries (#26)
1 parent e8ee4e7 commit 6ffd60c

File tree

2 files changed

+81
-10
lines changed

2 files changed

+81
-10
lines changed

test/infra/http_utils.go

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ import (
99
"io"
1010
"net/http"
1111
"net/url"
12+
"slices"
1213
"strings"
14+
"time"
1315

1416
"github.com/stretchr/testify/require"
1517
)
@@ -41,8 +43,11 @@ type HttpResponse struct {
4143
}
4244

4345
type HttpExecutor struct {
44-
it *Test
45-
request *http.Request
46+
it *Test
47+
request *http.Request
48+
retryOnStatuses []int
49+
retryBackoff time.Duration
50+
retryTimeout time.Duration
4651
}
4752

4853
func (h *HttpRequest) getUrl(endpoint string) string {
@@ -109,20 +114,43 @@ func (h *HttpRequest) executor(req *http.Request) *HttpExecutor {
109114
}
110115
}
111116

117+
func (h *HttpExecutor) WithRetries(backoff, timeout time.Duration, statuses ...int) *HttpExecutor {
118+
h.retryBackoff = backoff
119+
h.retryTimeout = timeout
120+
h.retryOnStatuses = statuses
121+
return h
122+
}
123+
112124
func (h *HttpExecutor) Do() *HttpResponse {
113-
resp, err := http.DefaultClient.Do(h.request)
125+
resp, err := h.doWithRetries()
114126
require.NoError(h.it, err)
115-
return &HttpResponse{
116-
it: h.it,
117-
response: resp,
118-
}
127+
return resp
119128
}
120129

121130
func (h *HttpExecutor) DoAndCaptureError() (*HttpResponse, error) {
131+
return h.doWithRetries()
132+
}
133+
134+
func (h *HttpExecutor) doWithRetries() (*HttpResponse, error) {
135+
start := time.Now()
136+
122137
resp, err := http.DefaultClient.Do(h.request)
123138
if err != nil {
124139
return nil, err
125140
}
141+
142+
elapsed := time.Since(start)
143+
backoff := max(250*time.Millisecond, h.retryBackoff)
144+
145+
for slices.Index(h.retryOnStatuses, resp.StatusCode) > -1 && elapsed < h.retryTimeout {
146+
time.Sleep(backoff)
147+
resp, err = http.DefaultClient.Do(h.request)
148+
if err != nil {
149+
return nil, err
150+
}
151+
elapsed = time.Since(start)
152+
}
153+
126154
return &HttpResponse{
127155
it: h.it,
128156
response: resp,

test/infra/itest_runner.go

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"net/http"
1010
"os"
1111
"path"
12+
"strings"
1213
"testing"
1314
"time"
1415

@@ -44,7 +45,15 @@ type Test struct {
4445
t *testing.T
4546
}
4647

47-
const requestTimeout = 5 * time.Second
48+
const (
49+
requestTimeout = 5 * time.Second
50+
defaultRetryBackoff = 250 * time.Millisecond
51+
defaultRetryTimeout = 2 * time.Second
52+
)
53+
54+
var defaultRetryCommandOnErrors = []string{
55+
"returned an unexpected status code 502",
56+
}
4857

4958
var runPlugin = plugins.RunCliWithPlugin(getApp())
5059

@@ -159,12 +168,43 @@ func (it *Test) PrepareWorkerTestDir() (string, string) {
159168
}
160169

161170
func (it *Test) RunCommand(args ...string) error {
171+
return it.RetryCommand(args, defaultRetryBackoff, defaultRetryTimeout, defaultRetryCommandOnErrors)
172+
}
173+
174+
func (it *Test) RetryCommand(args []string, backoff time.Duration, timeout time.Duration, onErrorContaining []string) error {
162175
oldArgs := os.Args
163176
defer func() {
164177
os.Args = oldArgs
165178
}()
166179
os.Args = args
167-
return runPlugin()
180+
181+
start := time.Now()
182+
err := runPlugin()
183+
elapsed := time.Since(start)
184+
185+
waitDuration := max(backoff, 250*time.Millisecond)
186+
187+
for shouldRetryCommandOnError(err, onErrorContaining) && elapsed < timeout {
188+
time.Sleep(waitDuration)
189+
err = runPlugin()
190+
elapsed = time.Since(start)
191+
}
192+
193+
return err
194+
}
195+
196+
func shouldRetryCommandOnError(err error, onErrorContaining []string) bool {
197+
if err == nil || len(onErrorContaining) == 0 {
198+
return false
199+
}
200+
201+
for _, s := range onErrorContaining {
202+
if strings.Contains(err.Error(), s) {
203+
return true
204+
}
205+
}
206+
207+
return false
168208
}
169209

170210
func (it *Test) CapturedOutput() []byte {
@@ -185,6 +225,7 @@ func (it *Test) GetAllWorkers() []*model.WorkerDetails {
185225
it.NewHttpRequestWithContext(ctx).
186226
WithAccessToken().
187227
Get("/worker/api/v1/workers").
228+
WithRetries(defaultRetryBackoff, defaultRetryTimeout, http.StatusBadGateway).
188229
Do().
189230
AsObject(&response)
190231

@@ -204,6 +245,7 @@ func (it *Test) CreateWorker(createRequest *model.WorkerDetails) {
204245
WithAccessToken().
205246
WithJsonBytes(jsonBytes).
206247
Post("/worker/api/v1/workers").
248+
WithRetries(defaultRetryBackoff, defaultRetryTimeout, http.StatusBadGateway).
207249
Do().
208250
IsCreated()
209251
}
@@ -216,7 +258,8 @@ func (it *Test) DeleteWorker(workerKey string) {
216258

217259
status := it.NewHttpRequestWithContext(ctx).
218260
WithAccessToken().
219-
Delete("/worker/api/v1/workers/" + workerKey).
261+
Delete("/worker/api/v1/workers/"+workerKey).
262+
WithRetries(defaultRetryBackoff, defaultRetryTimeout, http.StatusBadGateway).
220263
Do().
221264
StatusCode()
222265

0 commit comments

Comments
 (0)