Skip to content

Commit 773848b

Browse files
Return 429 when cant auto resume due to team limit (#2005)
1 parent 1fbce53 commit 773848b

File tree

8 files changed

+335
-16
lines changed

8 files changed

+335
-16
lines changed

packages/api/internal/handlers/proxy_grpc.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ func (s *SandboxService) ResumeSandbox(ctx context.Context, req *proxygrpc.Sandb
195195
nil, // volumeMounts
196196
)
197197
if apiErr != nil {
198-
return nil, status.Errorf(sharedutils.GRPCCodeFromHTTPStatus(apiErr.Code), "resume failed: %s", apiErr.ClientMsg)
198+
return nil, status.Error(sharedutils.GRPCCodeFromHTTPStatus(apiErr.Code), apiErr.ClientMsg)
199199
}
200200

201201
node := s.api.orchestrator.GetNode(sbx.ClusterID, sbx.NodeID)

packages/client-proxy/internal/proxy/proxy.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ const (
4040
autoResumeSucceeded autoResumeResult = iota
4141
autoResumeNotAllowed
4242
autoResumePermissionDenied
43+
autoResumeResourceExhausted
4344
autoResumeErrored
4445
)
4546

@@ -95,6 +96,9 @@ func handlePausedSandbox(
9596
if st.Code() == codes.NotFound {
9697
return "", autoResumeNotAllowed, nil
9798
}
99+
if st.Code() == codes.ResourceExhausted {
100+
return "", autoResumeResourceExhausted, reverseproxy.NewErrSandboxResourceExhausted(sandboxId, st.Message())
101+
}
98102
}
99103

100104
return "", autoResumeErrored, err
@@ -139,6 +143,13 @@ func NewClientProxy(meterProvider metric.MeterProvider, serviceName string, port
139143
return nil, resumeDeniedErr
140144
}
141145

146+
var resourceExhaustedErr *reverseproxy.SandboxResourceExhaustedError
147+
if errors.As(err, &resourceExhaustedErr) {
148+
l.Warn(ctx, "sandbox resource exhausted", zap.Error(err))
149+
150+
return nil, resourceExhaustedErr
151+
}
152+
142153
if !errors.Is(err, ErrNodeNotFound) {
143154
l.Warn(ctx, "failed to resolve node ip with Redis resolution", zap.Error(err))
144155
}

packages/client-proxy/internal/proxy/proxy_test.go

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,14 @@ func TestCatalogResolution_CatalogHit(t *testing.T) {
5959
c := catalog.NewMemorySandboxesCatalog()
6060
ff := newFF(t, true)
6161

62-
err := c.StoreSandbox(context.Background(), "sbx", &catalog.SandboxInfo{
62+
err := c.StoreSandbox(t.Context(), "sbx", &catalog.SandboxInfo{
6363
OrchestratorIP: "10.0.0.1",
6464
ExecutionID: "exec",
6565
StartedAt: time.Now(),
6666
}, time.Minute)
6767
require.NoError(t, err)
6868

69-
nodeIP, err := catalogResolution(context.Background(), "sbx", 8000, "", "", c, nil, ff)
69+
nodeIP, err := catalogResolution(t.Context(), "sbx", 8000, "", "", c, nil, ff)
7070
require.NoError(t, err)
7171
require.Equal(t, "10.0.0.1", nodeIP)
7272
}
@@ -77,14 +77,14 @@ func TestCatalogResolution_CatalogHit_EmptyIPReturnsEmpty(t *testing.T) {
7777
c := catalog.NewMemorySandboxesCatalog()
7878
ff := newFF(t, true)
7979

80-
err := c.StoreSandbox(context.Background(), "sbx", &catalog.SandboxInfo{
80+
err := c.StoreSandbox(t.Context(), "sbx", &catalog.SandboxInfo{
8181
OrchestratorIP: "",
8282
ExecutionID: "exec",
8383
StartedAt: time.Now(),
8484
}, time.Minute)
8585
require.NoError(t, err)
8686

87-
nodeIP, err := catalogResolution(context.Background(), "sbx", 8000, "", "", c, nil, ff)
87+
nodeIP, err := catalogResolution(t.Context(), "sbx", 8000, "", "", c, nil, ff)
8888
require.NoError(t, err)
8989
require.Empty(t, nodeIP)
9090
}
@@ -95,7 +95,7 @@ func TestCatalogResolution_CatalogMiss(t *testing.T) {
9595
c := catalog.NewMemorySandboxesCatalog()
9696
ff := newFF(t, true)
9797

98-
_, err := catalogResolution(context.Background(), "sbx", 8000, "", "", c, nil, ff)
98+
_, err := catalogResolution(t.Context(), "sbx", 8000, "", "", c, nil, ff)
9999
require.ErrorIs(t, err, ErrNodeNotFound)
100100
}
101101

@@ -104,7 +104,7 @@ func TestHandlePausedSandbox_NoResumer_MissingTrafficAccessToken(t *testing.T) {
104104

105105
ff := newFF(t, true)
106106

107-
_, res, err := handlePausedSandbox(context.Background(), "sbx", 8000, "", "", nil, ff)
107+
_, res, err := handlePausedSandbox(t.Context(), "sbx", 8000, "", "", nil, ff)
108108
require.NoError(t, err)
109109
require.Equal(t, autoResumeNotAllowed, res)
110110
}
@@ -114,7 +114,7 @@ func TestHandlePausedSandbox_NoResumer_InvalidTrafficAccessToken(t *testing.T) {
114114

115115
ff := newFF(t, true)
116116

117-
_, res, err := handlePausedSandbox(context.Background(), "sbx", 8000, "wrong-token", "", nil, ff)
117+
_, res, err := handlePausedSandbox(t.Context(), "sbx", 8000, "wrong-token", "", nil, ff)
118118
require.NoError(t, err)
119119
require.Equal(t, autoResumeNotAllowed, res)
120120
}
@@ -124,7 +124,7 @@ func TestHandlePausedSandbox_FlagDisabled(t *testing.T) {
124124

125125
ff := newFF(t, false)
126126

127-
_, res, err := handlePausedSandbox(context.Background(), "sbx", 8000, "token", "", stubResumer{nodeIP: "10.0.0.1"}, ff)
127+
_, res, err := handlePausedSandbox(t.Context(), "sbx", 8000, "token", "", stubResumer{nodeIP: "10.0.0.1"}, ff)
128128
require.NoError(t, err)
129129
require.Equal(t, autoResumeNotAllowed, res)
130130
}
@@ -134,7 +134,7 @@ func TestHandlePausedSandbox_NotFound(t *testing.T) {
134134

135135
ff := newFF(t, true)
136136

137-
_, res, err := handlePausedSandbox(context.Background(), "sbx", 8000, "token", "", stubResumer{err: status.Error(codes.NotFound, "not allowed")}, ff)
137+
_, res, err := handlePausedSandbox(t.Context(), "sbx", 8000, "token", "", stubResumer{err: status.Error(codes.NotFound, "not allowed")}, ff)
138138
require.NoError(t, err)
139139
require.Equal(t, autoResumeNotAllowed, res)
140140
}
@@ -144,19 +144,33 @@ func TestHandlePausedSandbox_PermissionDenied(t *testing.T) {
144144

145145
ff := newFF(t, true)
146146

147-
_, res, err := handlePausedSandbox(context.Background(), "sbx", 8000, "token", "", stubResumer{err: status.Error(codes.PermissionDenied, "permission denied")}, ff)
147+
_, res, err := handlePausedSandbox(t.Context(), "sbx", 8000, "token", "", stubResumer{err: status.Error(codes.PermissionDenied, "permission denied")}, ff)
148148
require.Error(t, err)
149149
var deniedErr *reverseproxy.SandboxResumePermissionDeniedError
150150
require.ErrorAs(t, err, &deniedErr)
151151
require.Equal(t, autoResumePermissionDenied, res)
152152
}
153153

154+
func TestHandlePausedSandbox_ResourceExhausted(t *testing.T) {
155+
t.Parallel()
156+
157+
ff := newFF(t, true)
158+
159+
_, res, err := handlePausedSandbox(t.Context(), "sbx", 8000, "token", "", stubResumer{err: status.Error(codes.ResourceExhausted, "rate limit hit")}, ff)
160+
require.Error(t, err)
161+
var exhaustedErr *reverseproxy.SandboxResourceExhaustedError
162+
require.ErrorAs(t, err, &exhaustedErr)
163+
require.Equal(t, "sbx", exhaustedErr.SandboxId)
164+
require.Equal(t, "rate limit hit", exhaustedErr.Message)
165+
require.Equal(t, autoResumeResourceExhausted, res)
166+
}
167+
154168
func TestHandlePausedSandbox_SnapshotNotFound(t *testing.T) {
155169
t.Parallel()
156170

157171
ff := newFF(t, true)
158172

159-
_, res, err := handlePausedSandbox(context.Background(), "sbx", 8000, "token", "", stubResumer{err: status.Error(codes.NotFound, "snapshot not found")}, ff)
173+
_, res, err := handlePausedSandbox(t.Context(), "sbx", 8000, "token", "", stubResumer{err: status.Error(codes.NotFound, "snapshot not found")}, ff)
160174
require.NoError(t, err)
161175
require.Equal(t, autoResumeNotAllowed, res)
162176
}
@@ -166,7 +180,7 @@ func TestHandlePausedSandbox_Error(t *testing.T) {
166180

167181
ff := newFF(t, true)
168182

169-
_, res, err := handlePausedSandbox(context.Background(), "sbx", 8000, "token", "", stubResumer{err: status.Error(codes.Unavailable, "boom")}, ff)
183+
_, res, err := handlePausedSandbox(t.Context(), "sbx", 8000, "token", "", stubResumer{err: status.Error(codes.Unavailable, "boom")}, ff)
170184
require.Error(t, err)
171185
require.Equal(t, autoResumeErrored, res)
172186
}
@@ -176,7 +190,7 @@ func TestHandlePausedSandbox_Succeeded(t *testing.T) {
176190

177191
ff := newFF(t, true)
178192

179-
nodeIP, res, err := handlePausedSandbox(context.Background(), "sbx", 8000, "token", "", stubResumer{nodeIP: "10.0.0.1"}, ff)
193+
nodeIP, res, err := handlePausedSandbox(t.Context(), "sbx", 8000, "token", "", stubResumer{nodeIP: "10.0.0.1"}, ff)
180194
require.NoError(t, err)
181195
require.Equal(t, autoResumeSucceeded, res)
182196
require.Equal(t, "10.0.0.1", nodeIP)
@@ -187,7 +201,7 @@ func TestHandlePausedSandbox_Succeeded_EmptyIP(t *testing.T) {
187201

188202
ff := newFF(t, true)
189203

190-
nodeIP, res, err := handlePausedSandbox(context.Background(), "sbx", 8000, "token", "", stubResumer{nodeIP: ""}, ff)
204+
nodeIP, res, err := handlePausedSandbox(t.Context(), "sbx", 8000, "token", "", stubResumer{nodeIP: ""}, ff)
191205
require.NoError(t, err)
192206
require.Equal(t, autoResumeSucceeded, res)
193207
require.Empty(t, nodeIP)
@@ -199,7 +213,7 @@ func TestHandlePausedSandbox_PassesPortAndTokenToResumer(t *testing.T) {
199213
ff := newFF(t, true)
200214
resumer := &recordingResumer{}
201215

202-
nodeIP, res, err := handlePausedSandbox(context.Background(), "sbx", 49983, "token", "envd-token", resumer, ff)
216+
nodeIP, res, err := handlePausedSandbox(t.Context(), "sbx", 49983, "token", "envd-token", resumer, ff)
203217
require.NoError(t, err)
204218
require.Equal(t, autoResumeSucceeded, res)
205219
require.Equal(t, "10.0.0.1", nodeIP)

packages/shared/pkg/proxy/errors.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,19 @@ func NewErrInvalidTrafficAccessToken(sandboxId string, header string) *InvalidTr
8181
Header: header,
8282
}
8383
}
84+
85+
type SandboxResourceExhaustedError struct {
86+
SandboxId string
87+
Message string
88+
}
89+
90+
func NewErrSandboxResourceExhausted(sandboxId string, message string) *SandboxResourceExhaustedError {
91+
return &SandboxResourceExhaustedError{
92+
SandboxId: sandboxId,
93+
Message: message,
94+
}
95+
}
96+
97+
func (e SandboxResourceExhaustedError) Error() string {
98+
return "sandbox resource exhausted"
99+
}

packages/shared/pkg/proxy/handler.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,25 @@ func handler(p *pool.ProxyPool, getDestination func(r *http.Request) (*pool.Dest
8686
return
8787
}
8888

89+
var resourceExhaustedErr *SandboxResourceExhaustedError
90+
if errors.As(err, &resourceExhaustedErr) {
91+
logger.L().Warn(ctx, "team sandbox limit reached",
92+
zap.String("host", r.Host),
93+
logger.WithSandboxID(resourceExhaustedErr.SandboxId))
94+
95+
err := template.
96+
NewTeamSandboxLimitError(resourceExhaustedErr.SandboxId, r.Host, resourceExhaustedErr.Message).
97+
HandleError(w, r)
98+
if err != nil {
99+
logger.L().Error(ctx, "failed to handle team sandbox limit error", zap.Error(err), logger.WithSandboxID(resourceExhaustedErr.SandboxId))
100+
http.Error(w, "Failed to handle team sandbox limit error", http.StatusInternalServerError)
101+
102+
return
103+
}
104+
105+
return
106+
}
107+
89108
var trafficMissingTokenErr *MissingTrafficAccessTokenError
90109
if errors.As(err, &trafficMissingTokenErr) {
91110
logger.L().Warn(ctx, "traffic access token is missing", zap.String("host", r.Host))

packages/shared/pkg/proxy/proxy_test.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,64 @@ func TestProxyResumePermissionDeniedErrorTemplate(t *testing.T) {
276276
})
277277
}
278278

279+
func TestProxyTeamSandboxLimitError(t *testing.T) {
280+
t.Parallel()
281+
282+
getDestination := func(*http.Request) (*pool.Destination, error) {
283+
return nil, NewErrSandboxResourceExhausted("test-sandbox", "rate limit hit")
284+
}
285+
286+
proxy, port, err := newTestProxy(t, getDestination)
287+
require.NoError(t, err)
288+
t.Cleanup(func() {
289+
proxy.Close()
290+
})
291+
292+
t.Run("json for non-browser", func(t *testing.T) {
293+
t.Parallel()
294+
proxyURL := fmt.Sprintf("http://127.0.0.1:%d/hello", port)
295+
resp, err := httpGet(t, proxyURL)
296+
require.NoError(t, err)
297+
t.Cleanup(func() {
298+
_ = resp.Body.Close()
299+
})
300+
301+
require.Equal(t, http.StatusTooManyRequests, resp.StatusCode)
302+
require.Equal(t, "application/json; charset=utf-8", resp.Header.Get("Content-Type"))
303+
304+
var response struct {
305+
Code int `json:"code"`
306+
Message string `json:"message"`
307+
}
308+
err = json.NewDecoder(resp.Body).Decode(&response)
309+
require.NoError(t, err)
310+
require.Equal(t, http.StatusTooManyRequests, response.Code)
311+
require.Equal(t, "rate limit hit", response.Message)
312+
})
313+
314+
t.Run("html for browser", func(t *testing.T) {
315+
t.Parallel()
316+
proxyURL := fmt.Sprintf("http://127.0.0.1:%d/hello", port)
317+
headers := http.Header{
318+
"User-Agent": {"Mozilla/5.0 (Windows NT 10.0; Win64; x64)"},
319+
}
320+
321+
resp, err := httpGetWithHeaders(t, proxyURL, headers)
322+
require.NoError(t, err)
323+
t.Cleanup(func() {
324+
_ = resp.Body.Close()
325+
})
326+
327+
require.Equal(t, http.StatusTooManyRequests, resp.StatusCode)
328+
require.Equal(t, "text/html; charset=utf-8", resp.Header.Get("Content-Type"))
329+
330+
body, err := io.ReadAll(resp.Body)
331+
require.NoError(t, err)
332+
assert.Contains(t, string(body), "Sandbox Limit Reached")
333+
assert.Contains(t, string(body), "rate limit hit")
334+
})
335+
}
336+
279337
func httpGet(t *testing.T, proxyURL string) (*http.Response, error) {
280338
t.Helper()
281339

0 commit comments

Comments
 (0)