Skip to content

Commit 2bbe5d7

Browse files
Abort tests in case of go-vcr failure (Azure#5067)
* Abort tests if go-vcr fails * Limit the number of response replays * Only replay GETs for terminal states * Don't require sequence match * Update tests * Tweak decisions about cachability * Add documentation * Refactor handling of resource status * Refactor terminal state detection * Increase limit for GET replay * Effectively unlimited GET replays * Fix lint issue
1 parent 498b1df commit 2bbe5d7

File tree

4 files changed

+206
-30
lines changed

4 files changed

+206
-30
lines changed

v2/internal/testcommon/vcr/v3/replay_roundtripper.go

Lines changed: 136 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@ package v3
88
import (
99
"bytes"
1010
"crypto/sha256"
11+
"encoding/json"
1112
"errors"
1213
"fmt"
1314
"io"
1415
"net/http"
16+
"strings"
1517
"sync"
1618

1719
"github.com/go-logr/logr"
@@ -30,13 +32,34 @@ import (
3032
// the original, which a GET then shows is complete.
3133
type replayRoundTripper struct {
3234
inner http.RoundTripper
33-
gets map[string]*http.Response
34-
puts map[string]*http.Response
35+
gets map[string]*replayResponse
36+
puts map[string]*replayResponse
3537
log logr.Logger
3638
padlock sync.Mutex
3739
redactor *vcr.Redactor
3840
}
3941

42+
type replayResponse struct {
43+
response *http.Response
44+
remainingReplays int
45+
}
46+
47+
const (
48+
// Timing variations during test replay may result in a resource being reconciled an additional time - we don't
49+
// want to fail the test in this situation because we've already successfully achieved our goal state.
50+
// Thus we allow the last PUT for each resource to be replayed one extra time.
51+
maxPutReplays = 1 // Maximum number of times to replay a PUT request
52+
53+
// GET requests may be replayed multiple times to allow multiple reconciles to observe the same stable final state.
54+
// We set this to accommodate timing variations during test replay, while avoiding unbounded replays as they might
55+
// result in a test getting stuck and continuing to run until the entire test suite times out.
56+
//
57+
// Currently we need to set this to an apparently comical limit because some of our tests requiring many many
58+
// repetitions to work due to changes in timing used during test replay. Once we've addressed other issues causing
59+
// test instability, we should be able to reduce this limit significantly.
60+
maxGetReplays = 1000 // Maximum number of times to replay a GET request (effectively unlimited for now)
61+
)
62+
4063
var _ http.RoundTripper = &replayRoundTripper{}
4164

4265
// newReplayRoundTripper creates a new replayRoundTripper that will replay selected requests to improve test resilience.
@@ -47,8 +70,8 @@ func NewReplayRoundTripper(
4770
) http.RoundTripper {
4871
return &replayRoundTripper{
4972
inner: inner,
50-
gets: make(map[string]*http.Response),
51-
puts: make(map[string]*http.Response),
73+
gets: make(map[string]*replayResponse),
74+
puts: make(map[string]*replayResponse),
5275
log: log,
5376
redactor: redactor,
5477
}
@@ -69,6 +92,8 @@ func (replayer *replayRoundTripper) RoundTrip(request *http.Request) (*http.Resp
6992
}
7093

7194
func (replayer *replayRoundTripper) roundTripGet(request *http.Request) (*http.Response, error) {
95+
requestURL := request.URL.RequestURI()
96+
7297
// First use our inner round tripper to get the response.
7398
response, err := replayer.inner.RoundTrip(request)
7499
if err != nil {
@@ -81,20 +106,37 @@ func (replayer *replayRoundTripper) roundTripGet(request *http.Request) (*http.R
81106
defer replayer.padlock.Unlock()
82107

83108
// We didn't find an interaction, see if we have a cached response to return
84-
if cachedResponse, ok := replayer.gets[request.URL.String()]; ok {
85-
replayer.log.Info("Replaying GET request", "url", request.URL.String())
86-
return cachedResponse, nil
109+
if cachedResponse, ok := replayer.gets[requestURL]; ok {
110+
if cachedResponse.remainingReplays > 0 {
111+
cachedResponse.remainingReplays--
112+
replayer.log.Info("Replaying GET request", "url", requestURL)
113+
return cachedResponse.response, nil
114+
}
115+
116+
// It's expired, remove it from the cache to ensure we don't replay it again
117+
delete(replayer.gets, requestURL)
87118
}
88119

89120
// No cached response, return the original response and error
90121
return response, err
91122
}
92123

93-
replayer.padlock.Lock()
94-
defer replayer.padlock.Unlock()
124+
// We have a response; if it has a status, cache only if that represents a terminal state
125+
cacheable := true
126+
if state, ok := replayer.resourceStateFromBody(response); ok {
127+
cacheable = replayer.isTerminalProvisioningState(state)
128+
}
129+
if status, ok := replayer.operationStatusFromBody(response); ok {
130+
cacheable = replayer.isTerminalOperationStatus(status)
131+
}
132+
133+
if cacheable {
134+
replayer.padlock.Lock()
135+
defer replayer.padlock.Unlock()
136+
137+
replayer.gets[requestURL] = newReplayResponse(response, maxGetReplays)
138+
}
95139

96-
// We have a response, cache it and return it
97-
replayer.gets[request.URL.String()] = response
98140
return response, nil
99141
}
100142

@@ -115,10 +157,15 @@ func (replayer *replayRoundTripper) roundTripPut(request *http.Request) (*http.R
115157

116158
// We didn't find an interaction, see if we have a cached response to return
117159
if cachedResponse, ok := replayer.puts[hash]; ok {
118-
// Remove it from the cache to ensure we only replay it once
160+
if cachedResponse.remainingReplays > 0 {
161+
replayer.log.Info("Replaying PUT request", "url", request.URL.String(), "hash", hash)
162+
cachedResponse.remainingReplays--
163+
return cachedResponse.response, nil
164+
}
165+
166+
// It's expired, remove it from the cache to ensure we don't replay it again
119167
delete(replayer.puts, hash)
120-
replayer.log.Info("Replaying PUT request", "url", request.URL.String(), "hash", hash)
121-
return cachedResponse, nil
168+
122169
}
123170

124171
// No cached response, return the original response and error
@@ -129,7 +176,7 @@ func (replayer *replayRoundTripper) roundTripPut(request *http.Request) (*http.R
129176
defer replayer.padlock.Unlock()
130177

131178
// We have a response, cache it and return it
132-
replayer.puts[hash] = response
179+
replayer.puts[hash] = newReplayResponse(response, maxPutReplays)
133180
return response, nil
134181
}
135182

@@ -155,3 +202,77 @@ func (replayer *replayRoundTripper) hashOfBody(request *http.Request) string {
155202

156203
return fmt.Sprintf("%x", hash)
157204
}
205+
206+
type operation struct {
207+
Status string `json:"status"`
208+
}
209+
210+
// operationStatusFromBody extracts the operation status from the response body, if present.
211+
func (replayer *replayRoundTripper) operationStatusFromBody(response *http.Response) (string, bool) {
212+
body := replayer.bodyOfResponse(response)
213+
214+
var op operation
215+
if err := json.Unmarshal([]byte(body), &op); err == nil {
216+
if op.Status != "" {
217+
return op.Status, true
218+
}
219+
}
220+
221+
return "", false
222+
}
223+
224+
// isTerminalOperationStatus returns true if the specified status represents a terminal state for a long-running operation
225+
func (replayer *replayRoundTripper) isTerminalOperationStatus(status string) bool {
226+
return !strings.EqualFold(status, "InProgress")
227+
}
228+
229+
type resource struct {
230+
Properties struct {
231+
ProvisioningState string `json:"provisioningState"`
232+
} `json:"properties"`
233+
}
234+
235+
// resourceStateFromBody extracts the provisioning state from the response body, if present.
236+
func (replayer *replayRoundTripper) resourceStateFromBody(response *http.Response) (string, bool) {
237+
body := replayer.bodyOfResponse(response)
238+
239+
// Treat the body as an operation and deserialize it
240+
241+
// Treat the body as a resource and deserialize it
242+
var res resource
243+
if err := json.Unmarshal([]byte(body), &res); err == nil {
244+
if res.Properties.ProvisioningState != "" {
245+
return res.Properties.ProvisioningState, true
246+
}
247+
}
248+
249+
return "", false
250+
}
251+
252+
// isTerminalStatus returns true if the specified status represents a terminal state a resource
253+
func (*replayRoundTripper) isTerminalProvisioningState(status string) bool {
254+
return strings.EqualFold(status, "Succeeded") ||
255+
strings.EqualFold(status, "Failed") ||
256+
strings.EqualFold(status, "Canceled")
257+
}
258+
259+
func (replayer *replayRoundTripper) bodyOfResponse(response *http.Response) string {
260+
// Read all the content of the response body
261+
var body bytes.Buffer
262+
_, err := body.ReadFrom(response.Body)
263+
if err != nil {
264+
// Should never fail
265+
panic(fmt.Sprintf("reading response.Body failed: %s", err))
266+
}
267+
268+
// Reset the body so it can be read again
269+
response.Body = io.NopCloser(&body)
270+
return body.String()
271+
}
272+
273+
func newReplayResponse(resp *http.Response, maxReplays int) *replayResponse {
274+
return &replayResponse{
275+
response: resp,
276+
remainingReplays: maxReplays,
277+
}
278+
}

v2/internal/testcommon/vcr/v3/replay_roundtripper_test.go

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import (
2323
"github.com/Azure/azure-service-operator/v2/internal/testcommon/vcr"
2424
)
2525

26-
func TestReplayRoundTripperRoundTrip_GivenSingleGET_ReturnsMultipleTimes(t *testing.T) {
26+
func TestReplayRoundTripperRoundTrip_GivenSingleGETReturningTerminalState_ReturnsMultipleTimes(t *testing.T) {
2727
t.Parallel()
2828

2929
// Arrange
@@ -35,7 +35,7 @@ func TestReplayRoundTripperRoundTrip_GivenSingleGET_ReturnsMultipleTimes(t *test
3535

3636
resp := &http.Response{
3737
StatusCode: 200,
38-
Body: io.NopCloser(strings.NewReader("GET response goes here")),
38+
Body: io.NopCloser(strings.NewReader(`{"properties":{"provisioningState": "Succeeded"}}`)),
3939
}
4040

4141
fake := vcr.NewFakeRoundTripper()
@@ -47,13 +47,46 @@ func TestReplayRoundTripperRoundTrip_GivenSingleGET_ReturnsMultipleTimes(t *test
4747
replayer := NewReplayRoundTripper(fake, logr.Discard(), redactor)
4848

4949
// Assert - first request works
50-
assertExpectedResponse(t, replayer, req, 200, "GET response goes here")
50+
assertExpectedResponse(t, replayer, req, 200, `{"provisioningState": "Succeeded"`)
5151

5252
// Assert - second request works by replaying the first
53-
assertExpectedResponse(t, replayer, req, 200, "GET response goes here")
53+
assertExpectedResponse(t, replayer, req, 200, `{"provisioningState": "Succeeded"`)
5454

5555
// Assert - third request works by replaying the first
56-
assertExpectedResponse(t, replayer, req, 200, "GET response goes here")
56+
assertExpectedResponse(t, replayer, req, 200, `{"provisioningState": "Succeeded"`)
57+
}
58+
59+
func TestReplayRoundTripperRoundTrip_GivenSingleGETReturningNonterminalState_ReturnsOnce(t *testing.T) {
60+
t.Parallel()
61+
g := NewGomegaWithT(t)
62+
63+
// Arrange
64+
req := &http.Request{
65+
URL: &url.URL{Path: "/foo"},
66+
Method: http.MethodGet,
67+
Body: io.NopCloser(strings.NewReader("GET body goes here")),
68+
}
69+
70+
resp := &http.Response{
71+
StatusCode: 200,
72+
Body: io.NopCloser(strings.NewReader(`{"properties":{"provisioningState": "Deleting"}}`)),
73+
}
74+
75+
fake := vcr.NewFakeRoundTripper()
76+
fake.AddResponse(req, resp)
77+
78+
redactor := vcr.NewRedactor(creds.DummyAzureIDs())
79+
80+
// Act
81+
replayer := NewReplayRoundTripper(fake, logr.Discard(), redactor)
82+
83+
// Assert - first request works
84+
assertExpectedResponse(t, replayer, req, 200, `{"provisioningState": "Deleting"`)
85+
86+
// Assert - second request fails
87+
//nolint:bodyclose // there's no actual body in this response to close
88+
_, err := fake.RoundTrip(req)
89+
g.Expect(err).To(MatchError(ContainSubstring("requested interaction not found")))
5790
}
5891

5992
func TestReplayRoundTripperRoundTrip_GivenSinglePut_ReturnsOnceExtra(t *testing.T) {

v2/internal/testcommon/vcr/v3/test_recorder.go

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,6 @@ func NewTestRecorder(
8484
return false
8585
}
8686

87-
// verify custom request count header matches, if present
88-
if header := r.Header.Get(CountHeader); header != "" {
89-
interactionHeader := i.Headers.Get(CountHeader)
90-
if header != interactionHeader {
91-
log.Info("Request count header mismatch", CountHeader, header, "interaction", interactionHeader)
92-
return false
93-
}
94-
}
95-
9687
// verify custom body hash header matches, if present
9788
if header := r.Header.Get(HashHeader); header != "" {
9889
interactionHeader := i.Headers.Get(HashHeader)

v2/internal/testcommon/verify.go

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ package testcommon
77

88
import (
99
"context"
10+
"strings"
1011

12+
"github.com/onsi/gomega"
1113
"github.com/rotisserie/eris"
1214
apierrors "k8s.io/apimachinery/pkg/api/errors"
1315
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -71,7 +73,17 @@ func (e *Verify) HasState(
7173

7274
inGoalState := ready.Status == desiredState && ready.Severity == desiredSeverity
7375
hasObservedGenerationChanged := oldGeneration != ready.ObservedGeneration
74-
return inGoalState && hasObservedGenerationChanged, nil
76+
if inGoalState && hasObservedGenerationChanged {
77+
return true, nil
78+
}
79+
80+
// If we ever get an error about a missing recording, the test has failed and we
81+
// can abort early without waiting for a timeout
82+
if strings.Contains(ready.Message, "cannot find go-vcr recording") {
83+
return false, gomega.StopTrying(ready.Message)
84+
}
85+
86+
return false, nil
7587
}
7688

7789
// Deleted verifies that the object specified has been deleted
@@ -88,6 +100,25 @@ func (e *Verify) Deleted(ctx context.Context, obj client.Object) (bool, error) {
88100
return false, err
89101
}
90102

103+
// Object hasn't been deleted, check its Condition for a terminal problem
104+
conditioner, ok := obj.(conditions.Conditioner)
105+
if !ok {
106+
return false, eris.Errorf("result of get was not conditions.Conditioner, was: %T", obj)
107+
}
108+
109+
ready, ok := conditions.GetCondition(conditioner, conditions.ConditionTypeReady)
110+
if !ok {
111+
return false, nil
112+
}
113+
114+
// If we ever get an error about a missing recording, the test has failed and we
115+
// can abort early without waiting for a timeout
116+
if ready.Reason == "DeletionNotSupportedInAzure" {
117+
if strings.Contains(ready.Message, "cannot find go-vcr recording") {
118+
return false, gomega.StopTrying(ready.Message)
119+
}
120+
}
121+
91122
return false, nil
92123
}
93124

0 commit comments

Comments
 (0)