Skip to content

Commit 2380de9

Browse files
dont make junior nerd
Signed-off-by: katara-Jayprakash <[email protected]>
1 parent 06e7c30 commit 2380de9

File tree

3 files changed

+5
-275
lines changed

3 files changed

+5
-275
lines changed

examples/kthena-router/ModelRouteWithGlobalRateLimit.yaml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@ spec:
99
- name: "default"
1010
targetModels:
1111
- modelServerName: "deepseek-r1-1-5b"
12-
# This configuration applies to all rules in this ModelRoute
13-
# - 10 input tokens per minute to be convenient to test
1412
rateLimit:
15-
inputTokensPerUnit: 10
16-
outputTokensPerUnit: 5000
13+
inputTokensPerUnit: 30
14+
outputTokensPerUnit: 100
1715
unit: minute
1816
global:
1917
redis:

test/e2e/router/e2e_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,6 @@ func TestModelRoutePrefillDecodeDisaggregation(t *testing.T) {
121121
func TestModelRouteSubset(t *testing.T) {
122122
TestModelRouteSubsetShared(t, testCtx, testNamespace, false, "")
123123
}
124+
func TestModelRouteWithGlobalRateLimit(t *testing.T) {
125+
TestModelRouteWithGlobalRateLimitShared(t, testCtx, testNamespace, false, "")
126+
}

test/e2e/router/shared.go

Lines changed: 0 additions & 271 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package router
1919
import (
2020
"context"
2121
"fmt"
22-
"io"
2322
"net/http"
2423
"strings"
2524
"testing"
@@ -456,276 +455,6 @@ func TestModelRouteSubsetShared(t *testing.T, testCtx *routercontext.RouterTestC
456455
})
457456
}
458457

459-
// TestModelRouteWithRateLimitShared is a shared test function that can be used by both
460-
// router and gateway-api test suites. When useGatewayAPI is true, it configures ModelRoute
461-
// with ParentRefs to the default Gateway.
462-
func TestModelRouteWithRateLimitShared(t *testing.T, testCtx *routercontext.RouterTestContext, testNamespace string, useGatewayApi bool, kthenaNamespace string) {
463-
const (
464-
rateLimitWindowSeconds = 60
465-
windowResetBuffer = 10 * time.Second
466-
inputTokenLimit = 30
467-
outputTokenLimit = 100
468-
tokensPerRequest = 10
469-
)
470-
ctx := context.Background()
471-
472-
standardMessage := []utils.ChatMessage{
473-
utils.NewChatMessage("user", "hello world"),
474-
}
475-
476-
// Test 1: Verify input token rate limit enforcement (30 tokens/minute)
477-
t.Run("VerifyInputTokenRateLimitEnforcement", func(t *testing.T) {
478-
t.Log("Test 1: Verifying input token rate limit")
479-
480-
modelRoute := utils.LoadYAMLFromFile[networkingv1alpha1.ModelRoute]("examples/kthena-router/ModelRouteWithRateLimit.yaml")
481-
modelRoute.Namespace = testNamespace
482-
setupModelRouteWithGatewayAPI(modelRoute, useGatewayApi, kthenaNamespace)
483-
484-
createdModelRoute, err := testCtx.KthenaClient.NetworkingV1alpha1().ModelRoutes(testNamespace).Create(ctx, modelRoute, metav1.CreateOptions{})
485-
require.NoError(t, err, "Failed to create ModelRoute")
486-
487-
t.Cleanup(func() {
488-
cleanupCtx := context.Background()
489-
if err := testCtx.KthenaClient.NetworkingV1alpha1().ModelRoutes(testNamespace).Delete(cleanupCtx, createdModelRoute.Name, metav1.DeleteOptions{}); err != nil {
490-
t.Logf("Warning: Failed to delete ModelRoute: %v", err)
491-
}
492-
})
493-
494-
require.Eventually(t, func() bool {
495-
mr, err := testCtx.KthenaClient.NetworkingV1alpha1().ModelRoutes(testNamespace).Get(ctx, createdModelRoute.Name, metav1.GetOptions{})
496-
return err == nil && mr != nil
497-
}, 2*time.Minute, 2*time.Second, "ModelRoute should be created")
498-
499-
// Calculate expected successful requests
500-
expectedSuccessfulRequests := inputTokenLimit / tokensPerRequest
501-
if expectedSuccessfulRequests == 0 {
502-
t.Fatalf("Invalid test configuration: inputTokenLimit (%d) / tokensPerRequest (%d) = 0",
503-
inputTokenLimit, tokensPerRequest)
504-
}
505-
506-
// Send requests until we exhaust the quota
507-
for i := 0; i < expectedSuccessfulRequests; i++ {
508-
resp := utils.SendChatRequest(t, createdModelRoute.Spec.ModelName, standardMessage)
509-
responseBody, readErr := io.ReadAll(resp.Body)
510-
resp.Body.Close()
511-
512-
require.NoError(t, readErr, "Failed to read response body on request %d", i+1)
513-
require.Equal(t, http.StatusOK, resp.StatusCode,
514-
"Request %d should succeed (consumed ~%d/%d tokens). Response: %s",
515-
i+1, (i+1)*tokensPerRequest, inputTokenLimit, string(responseBody))
516-
t.Logf("Request %d succeeded (consumed ~%d/%d tokens)", i+1, (i+1)*tokensPerRequest, inputTokenLimit)
517-
}
518-
519-
// Next request should be rate limited (quota exhausted)
520-
rateLimitedResp := utils.SendChatRequest(t, createdModelRoute.Spec.ModelName, standardMessage)
521-
defer rateLimitedResp.Body.Close()
522-
523-
assert.Equal(t, http.StatusTooManyRequests, rateLimitedResp.StatusCode,
524-
"Request %d should be rate limited", expectedSuccessfulRequests+1)
525-
526-
errorBody, err := io.ReadAll(rateLimitedResp.Body)
527-
require.NoError(t, err, "Failed to read rate limit error response body")
528-
assert.Contains(t, strings.ToLower(string(errorBody)), "rate limit",
529-
"Rate limit error response must contain descriptive message")
530-
531-
t.Logf("Input token rate limit enforced after %d requests", expectedSuccessfulRequests)
532-
})
533-
534-
// Test 2 Verify rate limit window accuracy and persistence
535-
t.Run("VerifyRateLimitWindowAccuracy", func(t *testing.T) {
536-
t.Log("Test 2: Verifying rate limit window accuracy...")
537-
538-
modelRoute := utils.LoadYAMLFromFile[networkingv1alpha1.ModelRoute]("examples/kthena-router/ModelRouteWithRateLimit.yaml")
539-
modelRoute.Namespace = testNamespace
540-
setupModelRouteWithGatewayAPI(modelRoute, useGatewayApi, kthenaNamespace)
541-
542-
createdModelRoute, err := testCtx.KthenaClient.NetworkingV1alpha1().ModelRoutes(testNamespace).Create(ctx, modelRoute, metav1.CreateOptions{})
543-
require.NoError(t, err, "Failed to create ModelRoute")
544-
545-
t.Cleanup(func() {
546-
cleanupCtx := context.Background()
547-
if err := testCtx.KthenaClient.NetworkingV1alpha1().ModelRoutes(testNamespace).Delete(cleanupCtx, createdModelRoute.Name, metav1.DeleteOptions{}); err != nil {
548-
t.Logf("Warning: Failed to delete ModelRoute: %v", err)
549-
}
550-
})
551-
552-
require.Eventually(t, func() bool {
553-
mr, err := testCtx.KthenaClient.NetworkingV1alpha1().ModelRoutes(testNamespace).Get(ctx, createdModelRoute.Name, metav1.GetOptions{})
554-
return err == nil && mr != nil
555-
}, 2*time.Minute, 2*time.Second, "ModelRoute should be created")
556-
557-
// Exhaust quota to ensure rate limit is active
558-
expectedSuccessfulRequests := inputTokenLimit / tokensPerRequest
559-
for i := 0; i < expectedSuccessfulRequests; i++ {
560-
resp := utils.SendChatRequest(t, createdModelRoute.Spec.ModelName, standardMessage)
561-
resp.Body.Close()
562-
assert.Equal(t, http.StatusOK, resp.StatusCode, "Request %d should succeed", i+1)
563-
}
564-
565-
// Verify rate limit is active
566-
rateLimitedResp := utils.SendChatRequest(t, createdModelRoute.Spec.ModelName, standardMessage)
567-
rateLimitedResp.Body.Close()
568-
assert.Equal(t, http.StatusTooManyRequests, rateLimitedResp.StatusCode,
569-
"Rate limit should be active after exhausting quota")
570-
571-
const halfWindowDuration = 10 * time.Second
572-
t.Logf("Waiting %v (within rate limit window)...", halfWindowDuration)
573-
time.Sleep(halfWindowDuration)
574-
575-
midWindowResp := utils.SendChatRequest(t, createdModelRoute.Spec.ModelName, standardMessage)
576-
midWindowResp.Body.Close()
577-
assert.Equal(t, http.StatusTooManyRequests, midWindowResp.StatusCode,
578-
"Rate limit should persist within the time window")
579-
580-
// Verify rate limit resets after window expiration (65 seconds > 60 seconds)
581-
remainingWindowDuration := (rateLimitWindowSeconds * time.Second) - halfWindowDuration + windowResetBuffer
582-
t.Logf("Waiting additional %v for window reset (total: %v)...",
583-
remainingWindowDuration, halfWindowDuration+remainingWindowDuration)
584-
time.Sleep(remainingWindowDuration)
585-
586-
postWindowResp := utils.SendChatRequest(t, createdModelRoute.Spec.ModelName, standardMessage)
587-
postWindowResp.Body.Close()
588-
assert.Equal(t, http.StatusOK, postWindowResp.StatusCode,
589-
"Request should succeed after rate limit window expires")
590-
591-
t.Log(" Rate limit window accuracy verified")
592-
})
593-
594-
// Test 3: Verify rate limit reset mechanism
595-
t.Run("VerifyRateLimitResetMechanism", func(t *testing.T) {
596-
t.Log("Test 3: Verifying rate limit reset mechanism...")
597-
598-
modelRoute := utils.LoadYAMLFromFile[networkingv1alpha1.ModelRoute]("examples/kthena-router/ModelRouteWithRateLimit.yaml")
599-
modelRoute.Namespace = testNamespace
600-
setupModelRouteWithGatewayAPI(modelRoute, useGatewayApi, kthenaNamespace)
601-
602-
createdModelRoute, err := testCtx.KthenaClient.NetworkingV1alpha1().ModelRoutes(testNamespace).Create(ctx, modelRoute, metav1.CreateOptions{})
603-
require.NoError(t, err, "Failed to create ModelRoute")
604-
605-
t.Cleanup(func() {
606-
cleanupCtx := context.Background()
607-
if err := testCtx.KthenaClient.NetworkingV1alpha1().ModelRoutes(testNamespace).Delete(cleanupCtx, createdModelRoute.Name, metav1.DeleteOptions{}); err != nil {
608-
t.Logf("Warning: Failed to delete ModelRoute: %v", err)
609-
}
610-
})
611-
612-
require.Eventually(t, func() bool {
613-
mr, err := testCtx.KthenaClient.NetworkingV1alpha1().ModelRoutes(testNamespace).Get(ctx, createdModelRoute.Name, metav1.GetOptions{})
614-
return err == nil && mr != nil
615-
}, 2*time.Minute, 2*time.Second, "ModelRoute should be created")
616-
617-
// Consume the quota
618-
expectedSuccessfulRequests := inputTokenLimit / tokensPerRequest
619-
for i := 0; i < expectedSuccessfulRequests; i++ {
620-
resp := utils.SendChatRequest(t, createdModelRoute.Spec.ModelName, standardMessage)
621-
resp.Body.Close()
622-
assert.Equal(t, http.StatusOK, resp.StatusCode,
623-
"Request %d should succeed", i+1)
624-
}
625-
626-
// Confirm rate limiting is active
627-
preResetResp := utils.SendChatRequest(t, createdModelRoute.Spec.ModelName, standardMessage)
628-
preResetResp.Body.Close()
629-
assert.Equal(t, http.StatusTooManyRequests, preResetResp.StatusCode,
630-
"Rate limit should be active before window reset")
631-
632-
// Wait for complete window reset
633-
windowResetDuration := (rateLimitWindowSeconds * time.Second) + windowResetBuffer
634-
t.Logf("Waiting %v for complete rate limit window reset...", windowResetDuration)
635-
time.Sleep(windowResetDuration)
636-
637-
// Verify quota is restored after reset (should allow 2 requests again)
638-
for i := 0; i < expectedSuccessfulRequests; i++ {
639-
resp := utils.SendChatRequest(t, createdModelRoute.Spec.ModelName, standardMessage)
640-
resp.Body.Close()
641-
assert.Equal(t, http.StatusOK, resp.StatusCode,
642-
"Request %d should succeed after reset", i+1)
643-
}
644-
645-
// Verify rate limiting kicks in again after consuming quota
646-
postResetRateLimitedResp := utils.SendChatRequest(t, createdModelRoute.Spec.ModelName, standardMessage)
647-
postResetRateLimitedResp.Body.Close()
648-
assert.Equal(t, http.StatusTooManyRequests, postResetRateLimitedResp.StatusCode,
649-
"Rate limit should be active again after consuming quota")
650-
651-
t.Logf("Rate limit reset mechanism verified (quota restored: %d requests)", expectedSuccessfulRequests)
652-
})
653-
654-
// Test 4: Verify output token rate limit enforcement
655-
t.Run("VerifyOutputTokenRateLimitEnforcement", func(t *testing.T) {
656-
t.Log("Test 4: Verifying output token rate limit (100 tokens/minute)...")
657-
658-
modelRoute := utils.LoadYAMLFromFile[networkingv1alpha1.ModelRoute]("examples/kthena-router/ModelRouteWithRateLimit.yaml")
659-
modelRoute.Namespace = testNamespace
660-
setupModelRouteWithGatewayAPI(modelRoute, useGatewayApi, kthenaNamespace)
661-
662-
createdModelRoute, err := testCtx.KthenaClient.NetworkingV1alpha1().ModelRoutes(testNamespace).Create(ctx, modelRoute, metav1.CreateOptions{})
663-
require.NoError(t, err, "Failed to create ModelRoute")
664-
665-
t.Cleanup(func() {
666-
cleanupCtx := context.Background()
667-
if err := testCtx.KthenaClient.NetworkingV1alpha1().ModelRoutes(testNamespace).Delete(cleanupCtx, createdModelRoute.Name, metav1.DeleteOptions{}); err != nil {
668-
t.Logf("Warning: Failed to delete ModelRoute: %v", err)
669-
}
670-
})
671-
672-
require.Eventually(t, func() bool {
673-
mr, err := testCtx.KthenaClient.NetworkingV1alpha1().ModelRoutes(testNamespace).Get(ctx, createdModelRoute.Name, metav1.GetOptions{})
674-
return err == nil && mr != nil
675-
}, 2*time.Minute, 2*time.Second, "ModelRoute should be created")
676-
677-
// Update ModelRoute to disable input token limit
678-
createdModelRoute.Spec.RateLimit.InputTokensPerUnit = nil
679-
outputLimit := uint32(outputTokenLimit)
680-
createdModelRoute.Spec.RateLimit.OutputTokensPerUnit = &outputLimit
681-
682-
updatedModelRoute, err := testCtx.KthenaClient.NetworkingV1alpha1().ModelRoutes(testNamespace).Update(ctx, createdModelRoute, metav1.UpdateOptions{})
683-
require.NoError(t, err, "Failed to update ModelRoute")
684-
685-
// Wait for update to propagate
686-
time.Sleep(2 * time.Second)
687-
688-
longerPrompt := []utils.ChatMessage{
689-
utils.NewChatMessage("user", "Write a detailed explanation of rate limiting"),
690-
}
691-
692-
// Send requests until we hit the output token limit
693-
var successfulRequests int
694-
var totalResponseSize int
695-
var rateLimited bool
696-
697-
for attempt := 0; attempt < 20; attempt++ {
698-
resp := utils.SendChatRequest(t, updatedModelRoute.Spec.ModelName, longerPrompt)
699-
responseBody, readErr := io.ReadAll(resp.Body)
700-
resp.Body.Close()
701-
702-
require.NoError(t, readErr, "Failed to read response body")
703-
704-
if resp.StatusCode == http.StatusOK {
705-
successfulRequests++
706-
totalResponseSize += len(responseBody)
707-
t.Logf("Request %d succeeded, response size: %d bytes (total: %d bytes)",
708-
attempt+1, len(responseBody), totalResponseSize)
709-
} else if resp.StatusCode == http.StatusTooManyRequests {
710-
t.Logf("Output rate limited after %d requests", successfulRequests)
711-
assert.Contains(t, strings.ToLower(string(responseBody)), "rate limit",
712-
"Output rate limit error should mention rate limit")
713-
rateLimited = true
714-
break
715-
} else {
716-
t.Fatalf("Unexpected HTTP status code %d on attempt %d", resp.StatusCode, attempt+1)
717-
}
718-
}
719-
720-
// Verify output rate limiting was enforced
721-
assert.True(t, rateLimited, "Expected output rate limiting to be enforced")
722-
assert.Greater(t, successfulRequests, 0,
723-
"Expected at least one successful request before output rate limiting")
724-
725-
t.Logf(" Output token rate limit enforced after %d requests", successfulRequests)
726-
})
727-
}
728-
729458
// TestModelRouteWithGlobalRateLimitShared is a shared test function that can be used by both
730459
// router and gateway-api test suites. When useGatewayAPI is true, it configures ModelRoute
731460
// with ParentRefs to the default Gateway.

0 commit comments

Comments
 (0)