Skip to content

Commit 3c18726

Browse files
committed
add retry mechanism
1 parent 1d42eb5 commit 3c18726

File tree

4 files changed

+106
-45
lines changed

4 files changed

+106
-45
lines changed

cluster-autoscaler/cloudprovider/kamatera/kamatera_api_client_rest.go

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,12 @@ const (
3232
// NewKamateraApiClientRest factory to create new Rest API Client struct
3333
func NewKamateraApiClientRest(clientId string, secret string, url string) (client KamateraApiClientRest) {
3434
return KamateraApiClientRest{
35-
userAgent: userAgent,
36-
clientId: clientId,
37-
secret: secret,
38-
url: url,
35+
userAgent: userAgent,
36+
clientId: clientId,
37+
secret: secret,
38+
url: url,
39+
maxRetries: 5,
40+
expSecondsBetweenRetries: 1,
3941
}
4042
}
4143

@@ -75,10 +77,12 @@ type KamateraServerCreatePostRequest struct {
7577

7678
// KamateraApiClientRest is the struct to perform API calls
7779
type KamateraApiClientRest struct {
78-
userAgent string
79-
clientId string
80-
secret string
81-
url string
80+
userAgent string
81+
clientId string
82+
secret string
83+
url string
84+
maxRetries int
85+
expSecondsBetweenRetries int
8286
}
8387

8488
// ListServers returns a list of all servers in the relevant account and fetches their tags
@@ -89,6 +93,8 @@ func (c *KamateraApiClientRest) ListServers(ctx context.Context, instances map[s
8993
"GET",
9094
"/service/servers",
9195
nil,
96+
c.maxRetries,
97+
c.expSecondsBetweenRetries,
9298
)
9399
if err != nil {
94100
return nil, err
@@ -121,16 +127,20 @@ func (c *KamateraApiClientRest) DeleteServer(ctx context.Context, name string) e
121127
"POST",
122128
"/service/server/poweroff",
123129
KamateraServerPostRequest{ServerName: name},
130+
c.maxRetries,
131+
c.expSecondsBetweenRetries,
124132
)
125133
if err == nil {
126134
commandId := res.([]interface{})[0].(string)
127135
_, err = waitCommand(
128136
ctx,
129137
ProviderConfig{ApiUrl: c.url, ApiClientID: c.clientId, ApiSecret: c.secret},
130138
commandId,
139+
c.maxRetries,
140+
c.expSecondsBetweenRetries,
131141
)
132142
if err != nil {
133-
return err
143+
klog.V(1).Infof("Failed to validate server power off but will attempt to terminate anyway %s: %v", name, err)
134144
}
135145
} else {
136146
klog.V(1).Infof("Failed to power off server but will attempt to terminate anyway %s: %v", name, err)
@@ -141,6 +151,8 @@ func (c *KamateraApiClientRest) DeleteServer(ctx context.Context, name string) e
141151
"POST",
142152
"/service/server/terminate",
143153
KamateraServerTerminatePostRequest{ServerName: name, Force: true},
154+
c.maxRetries,
155+
c.expSecondsBetweenRetries,
144156
)
145157
if err != nil {
146158
return err
@@ -195,6 +207,8 @@ func (c *KamateraApiClientRest) CreateServers(ctx context.Context, count int, co
195207
UserdataFile: config.UserdataFile,
196208
Tag: Tag,
197209
},
210+
1,
211+
0,
198212
)
199213
if err != nil {
200214
return nil, err
@@ -211,6 +225,8 @@ func (c *KamateraApiClientRest) CreateServers(ctx context.Context, count int, co
211225
ctx,
212226
ProviderConfig{ApiUrl: c.url, ApiClientID: c.clientId, ApiSecret: c.secret},
213227
serverNameCommandIds,
228+
c.maxRetries,
229+
c.expSecondsBetweenRetries,
214230
)
215231
if err != nil {
216232
return nil, err
@@ -233,6 +249,8 @@ func (c *KamateraApiClientRest) getServerTags(ctx context.Context, serverName st
233249
"POST",
234250
"/server/tags",
235251
KamateraServerPostRequest{ServerName: serverName},
252+
c.maxRetries,
253+
c.expSecondsBetweenRetries,
236254
)
237255
if err != nil {
238256
return nil, err

cluster-autoscaler/cloudprovider/kamatera/kamatera_api_client_rest_test.go

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,22 @@ const (
3131
mockKamateraSecret = "mock-secret"
3232
)
3333

34+
func NewMockKamateraApiClientRest(url string, maxRetries int, expSecondsBetweenRetries int) (client KamateraApiClientRest) {
35+
return KamateraApiClientRest{
36+
userAgent: userAgent,
37+
clientId: mockKamateraClientId,
38+
secret: mockKamateraSecret,
39+
url: url,
40+
maxRetries: maxRetries,
41+
expSecondsBetweenRetries: expSecondsBetweenRetries,
42+
}
43+
}
44+
3445
func TestApiClientRest_ListServers_NoServers(t *testing.T) {
3546
server := NewHttpServerMock(MockFieldContentType, MockFieldResponse)
3647
defer server.Close()
3748
ctx := context.Background()
38-
client := NewKamateraApiClientRest(mockKamateraClientId, mockKamateraSecret, server.URL)
49+
client := NewMockKamateraApiClientRest(server.URL, 5, 0)
3950
server.On("handle", "/service/servers").Return(
4051
"application/json",
4152
`[]`,
@@ -50,7 +61,7 @@ func TestApiClientRest_ListServers(t *testing.T) {
5061
server := NewHttpServerMock(MockFieldContentType, MockFieldResponse)
5162
defer server.Close()
5263
ctx := context.Background()
53-
client := NewKamateraApiClientRest(mockKamateraClientId, mockKamateraSecret, server.URL)
64+
client := NewMockKamateraApiClientRest(server.URL, 5, 0)
5465
newServerName1 := mockKamateraServerName()
5566
cachedServerName2 := mockKamateraServerName()
5667
cachedServerName3 := mockKamateraServerName()
@@ -118,7 +129,7 @@ func TestApiClientRest_ListServersNamePrefix(t *testing.T) {
118129
server := NewHttpServerMock(MockFieldContentType, MockFieldResponse)
119130
defer server.Close()
120131
ctx := context.Background()
121-
client := NewKamateraApiClientRest(mockKamateraClientId, mockKamateraSecret, server.URL)
132+
client := NewMockKamateraApiClientRest(server.URL, 5, 0)
122133
newServerName1 := "prefixa" + mockKamateraServerName()
123134
newServerName2 := "prefixb" + mockKamateraServerName()
124135
server.On("handle", "/service/servers").Return(
@@ -148,7 +159,7 @@ func TestApiClientRest_ListServersNoTags(t *testing.T) {
148159
server := NewHttpServerMock(MockFieldContentType, MockFieldResponse)
149160
defer server.Close()
150161
ctx := context.Background()
151-
client := NewKamateraApiClientRest(mockKamateraClientId, mockKamateraSecret, server.URL)
162+
client := NewMockKamateraApiClientRest(server.URL, 5, 0)
152163
newServerName1 := mockKamateraServerName()
153164
server.On("handle", "/service/servers").Return(
154165
"application/json", fmt.Sprintf(`[{"name": "%s", "power": "on"}]`, newServerName1),
@@ -172,7 +183,7 @@ func TestApiClientRest_ListServersTagsError(t *testing.T) {
172183
server := NewHttpServerMock(MockFieldContentType, MockFieldResponse, MockFieldStatusCode)
173184
defer server.Close()
174185
ctx := context.Background()
175-
client := NewKamateraApiClientRest(mockKamateraClientId, mockKamateraSecret, server.URL)
186+
client := NewMockKamateraApiClientRest(server.URL, 5, 0)
176187
newServerName1 := mockKamateraServerName()
177188
server.On("handle", "/service/servers").Return(
178189
"application/json", fmt.Sprintf(`[{"name": "%s", "power": "on"}]`, newServerName1), 200,
@@ -188,7 +199,7 @@ func TestApiClientRest_DeleteServer(t *testing.T) {
188199
server := NewHttpServerMock(MockFieldContentType, MockFieldResponse)
189200
defer server.Close()
190201
ctx := context.Background()
191-
client := NewKamateraApiClientRest(mockKamateraClientId, mockKamateraSecret, server.URL)
202+
client := NewMockKamateraApiClientRest(server.URL, 5, 0)
192203
serverName := mockKamateraServerName()
193204
commandId := "mock-command-id"
194205
server.On("handle", "/service/server/poweroff").Return(
@@ -206,11 +217,31 @@ func TestApiClientRest_DeleteServer(t *testing.T) {
206217
mock.AssertExpectationsForObjects(t, server)
207218
}
208219

220+
func TestApiClientRest_DeleteServer_TerminateError(t *testing.T) {
221+
server := NewHttpServerMock(MockFieldContentType, MockFieldResponse, MockFieldStatusCode)
222+
defer server.Close()
223+
ctx := context.Background()
224+
client := NewMockKamateraApiClientRest(server.URL, 5, 0)
225+
serverName := mockKamateraServerName()
226+
commandId := "mock-command-id"
227+
server.On("handle", "/service/server/poweroff").Return(
228+
"application/json", fmt.Sprintf(`["%s"]`, commandId), 200,
229+
).Once().On("handle", "/service/queue").Return(
230+
"application/json", `[{"status": "complete"}]`, 200,
231+
).Once().On("handle", "/service/server/terminate").Return(
232+
"application/json",
233+
"Gateway Timeout",
234+
504,
235+
).Times(5)
236+
err := client.DeleteServer(ctx, serverName)
237+
assert.Error(t, err)
238+
}
239+
209240
func TestApiClientRest_CreateServers(t *testing.T) {
210241
server := NewHttpServerMock(MockFieldContentType, MockFieldResponse)
211242
defer server.Close()
212243
ctx := context.Background()
213-
client := NewKamateraApiClientRest(mockKamateraClientId, mockKamateraSecret, server.URL)
244+
client := NewMockKamateraApiClientRest(server.URL, 5, 0)
214245
commandId := "command"
215246
server.On("handle", "/service/server").Return(
216247
"application/json",

cluster-autoscaler/cloudprovider/kamatera/kamatera_node_group_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ func TestNodeGroup_DeleteNodes(t *testing.T) {
159159
assert.Error(t, err)
160160
assert.Contains(t, err.Error(), "cannot find this node in the node group")
161161

162-
// test error on deleting a node when the linode API call fails
162+
// test error on deleting a node when the API call fails
163163
client.On(
164164
"DeleteServer", ctx, serverName4,
165165
).Return(fmt.Errorf("error on API call")).Once()

cluster-autoscaler/cloudprovider/kamatera/kamatera_request.go

Lines changed: 40 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ type ProviderConfig struct {
4141
ApiSecret string
4242
}
4343

44-
func request(ctx context.Context, provider ProviderConfig, method string, path string, body interface{}) (interface{}, error) {
44+
func request(ctx context.Context, provider ProviderConfig, method string, path string, body interface{}, numRetries int, secondsBetweenRetries int) (interface{}, error) {
4545
buf := new(bytes.Buffer)
4646
if body != nil {
4747
if err := json.NewEncoder(buf).Encode(body); err != nil {
@@ -50,36 +50,48 @@ func request(ctx context.Context, provider ProviderConfig, method string, path s
5050
}
5151
path = strings.TrimPrefix(path, "/")
5252
url := fmt.Sprintf("%s/%s", provider.ApiUrl, path)
53-
klog.V(2).Infof("kamatera request: %s %s %s", method, url, buf.String())
54-
req, err := http.NewRequestWithContext(ctx, method, fmt.Sprintf("%s/%s", provider.ApiUrl, path), buf)
55-
if err != nil {
56-
return nil, err
57-
}
58-
req.Header.Add("AuthClientId", provider.ApiClientID)
59-
req.Header.Add("AuthSecret", provider.ApiSecret)
60-
req.Header.Add("Accept", "application/json")
61-
req.Header.Add("Content-Type", "application/json")
62-
res, err := http.DefaultClient.Do(req)
63-
if err != nil {
64-
return nil, err
65-
}
66-
defer res.Body.Close()
67-
6853
var result interface{}
69-
err = json.NewDecoder(res.Body).Decode(&result)
70-
if err != nil {
54+
var err error
55+
for attempt := 0; attempt < numRetries; attempt++ {
56+
klog.V(2).Infof("kamatera request: %s %s %s", method, url, buf.String())
57+
if attempt > 0 {
58+
klog.V(2).Infof("kamatera request retry %d", attempt)
59+
time.Sleep(time.Duration(secondsBetweenRetries<<attempt) * time.Second)
60+
}
61+
req, e := http.NewRequestWithContext(ctx, method, fmt.Sprintf("%s/%s", provider.ApiUrl, path), buf)
62+
if e != nil {
63+
err = e
64+
continue
65+
}
66+
req.Header.Add("AuthClientId", provider.ApiClientID)
67+
req.Header.Add("AuthSecret", provider.ApiSecret)
68+
req.Header.Add("Accept", "application/json")
69+
req.Header.Add("Content-Type", "application/json")
70+
res, e := http.DefaultClient.Do(req)
71+
if e != nil {
72+
err = e
73+
continue
74+
}
75+
defer res.Body.Close()
76+
e = json.NewDecoder(res.Body).Decode(&result)
77+
if e != nil {
78+
if res.StatusCode != 200 {
79+
err = fmt.Errorf("bad status code from Kamatera API: %d", res.StatusCode)
80+
} else {
81+
err = fmt.Errorf("invalid response from Kamatera API: %+v", result)
82+
}
83+
continue
84+
}
7185
if res.StatusCode != 200 {
72-
return nil, fmt.Errorf("bad status code from Kamatera API: %d", res.StatusCode)
86+
err = fmt.Errorf("error response from Kamatera API (%d): %+v", res.StatusCode, result)
87+
continue
7388
}
74-
return nil, fmt.Errorf("invalid response from Kamatera API: %+v", result)
75-
}
76-
if res.StatusCode != 200 {
77-
return nil, fmt.Errorf("error response from Kamatera API (%d): %+v", res.StatusCode, result)
89+
break
7890
}
79-
return result, nil
91+
return result, err
8092
}
8193

82-
func waitCommand(ctx context.Context, provider ProviderConfig, commandID string) (map[string]interface{}, error) {
94+
func waitCommand(ctx context.Context, provider ProviderConfig, commandID string, numRetries int, secondsBetweenRetries int) (map[string]interface{}, error) {
8395
startTime := time.Now()
8496
time.Sleep(2 * time.Second)
8597

@@ -90,7 +102,7 @@ func waitCommand(ctx context.Context, provider ProviderConfig, commandID string)
90102

91103
time.Sleep(2 * time.Second)
92104

93-
result, e := request(ctx, provider, "GET", fmt.Sprintf("/service/queue?id=%s", commandID), nil)
105+
result, e := request(ctx, provider, "GET", fmt.Sprintf("/service/queue?id=%s", commandID), nil, numRetries, secondsBetweenRetries)
94106
if e != nil {
95107
return nil, e
96108
}
@@ -117,7 +129,7 @@ func waitCommand(ctx context.Context, provider ProviderConfig, commandID string)
117129
}
118130
}
119131

120-
func waitCommands(ctx context.Context, provider ProviderConfig, commandIds map[string]string) (map[string]interface{}, error) {
132+
func waitCommands(ctx context.Context, provider ProviderConfig, commandIds map[string]string, numRetries int, secondsBetweenRetries int) (map[string]interface{}, error) {
121133
startTime := time.Now()
122134
time.Sleep(2 * time.Second)
123135

@@ -136,7 +148,7 @@ func waitCommands(ctx context.Context, provider ProviderConfig, commandIds map[s
136148
for id, result := range commandIdsResults {
137149
if result == nil {
138150
commandId := commandIds[id]
139-
result, e := request(ctx, provider, "GET", fmt.Sprintf("/service/queue?id=%s", commandId), nil)
151+
result, e := request(ctx, provider, "GET", fmt.Sprintf("/service/queue?id=%s", commandId), nil, numRetries, secondsBetweenRetries)
140152
if e != nil {
141153
return nil, e
142154
}

0 commit comments

Comments
 (0)