@@ -19,7 +19,6 @@ package router
1919import (
2020 "context"
2121 "fmt"
22- "io"
2322 "net/http"
2423 "strings"
2524 "testing"
@@ -456,276 +455,6 @@ func TestModelRouteSubsetShared(t *testing.T, testCtx *routercontext.RouterTestC
456455 })
457456}
458457
459- // TestModelRouteWithRateLimitShared is a shared test function that can be used by both
460- // router and gateway-api test suites. When useGatewayAPI is true, it configures ModelRoute
461- // with ParentRefs to the default Gateway.
462- func TestModelRouteWithRateLimitShared (t * testing.T , testCtx * routercontext.RouterTestContext , testNamespace string , useGatewayApi bool , kthenaNamespace string ) {
463- const (
464- rateLimitWindowSeconds = 60
465- windowResetBuffer = 10 * time .Second
466- inputTokenLimit = 30
467- outputTokenLimit = 100
468- tokensPerRequest = 10
469- )
470- ctx := context .Background ()
471-
472- standardMessage := []utils.ChatMessage {
473- utils .NewChatMessage ("user" , "hello world" ),
474- }
475-
476- // Test 1: Verify input token rate limit enforcement (30 tokens/minute)
477- t .Run ("VerifyInputTokenRateLimitEnforcement" , func (t * testing.T ) {
478- t .Log ("Test 1: Verifying input token rate limit" )
479-
480- modelRoute := utils.LoadYAMLFromFile [networkingv1alpha1.ModelRoute ]("examples/kthena-router/ModelRouteWithRateLimit.yaml" )
481- modelRoute .Namespace = testNamespace
482- setupModelRouteWithGatewayAPI (modelRoute , useGatewayApi , kthenaNamespace )
483-
484- createdModelRoute , err := testCtx .KthenaClient .NetworkingV1alpha1 ().ModelRoutes (testNamespace ).Create (ctx , modelRoute , metav1.CreateOptions {})
485- require .NoError (t , err , "Failed to create ModelRoute" )
486-
487- t .Cleanup (func () {
488- cleanupCtx := context .Background ()
489- if err := testCtx .KthenaClient .NetworkingV1alpha1 ().ModelRoutes (testNamespace ).Delete (cleanupCtx , createdModelRoute .Name , metav1.DeleteOptions {}); err != nil {
490- t .Logf ("Warning: Failed to delete ModelRoute: %v" , err )
491- }
492- })
493-
494- require .Eventually (t , func () bool {
495- mr , err := testCtx .KthenaClient .NetworkingV1alpha1 ().ModelRoutes (testNamespace ).Get (ctx , createdModelRoute .Name , metav1.GetOptions {})
496- return err == nil && mr != nil
497- }, 2 * time .Minute , 2 * time .Second , "ModelRoute should be created" )
498-
499- // Calculate expected successful requests
500- expectedSuccessfulRequests := inputTokenLimit / tokensPerRequest
501- if expectedSuccessfulRequests == 0 {
502- t .Fatalf ("Invalid test configuration: inputTokenLimit (%d) / tokensPerRequest (%d) = 0" ,
503- inputTokenLimit , tokensPerRequest )
504- }
505-
506- // Send requests until we exhaust the quota
507- for i := 0 ; i < expectedSuccessfulRequests ; i ++ {
508- resp := utils .SendChatRequest (t , createdModelRoute .Spec .ModelName , standardMessage )
509- responseBody , readErr := io .ReadAll (resp .Body )
510- resp .Body .Close ()
511-
512- require .NoError (t , readErr , "Failed to read response body on request %d" , i + 1 )
513- require .Equal (t , http .StatusOK , resp .StatusCode ,
514- "Request %d should succeed (consumed ~%d/%d tokens). Response: %s" ,
515- i + 1 , (i + 1 )* tokensPerRequest , inputTokenLimit , string (responseBody ))
516- t .Logf ("Request %d succeeded (consumed ~%d/%d tokens)" , i + 1 , (i + 1 )* tokensPerRequest , inputTokenLimit )
517- }
518-
519- // Next request should be rate limited (quota exhausted)
520- rateLimitedResp := utils .SendChatRequest (t , createdModelRoute .Spec .ModelName , standardMessage )
521- defer rateLimitedResp .Body .Close ()
522-
523- assert .Equal (t , http .StatusTooManyRequests , rateLimitedResp .StatusCode ,
524- "Request %d should be rate limited" , expectedSuccessfulRequests + 1 )
525-
526- errorBody , err := io .ReadAll (rateLimitedResp .Body )
527- require .NoError (t , err , "Failed to read rate limit error response body" )
528- assert .Contains (t , strings .ToLower (string (errorBody )), "rate limit" ,
529- "Rate limit error response must contain descriptive message" )
530-
531- t .Logf ("Input token rate limit enforced after %d requests" , expectedSuccessfulRequests )
532- })
533-
534- // Test 2 Verify rate limit window accuracy and persistence
535- t .Run ("VerifyRateLimitWindowAccuracy" , func (t * testing.T ) {
536- t .Log ("Test 2: Verifying rate limit window accuracy..." )
537-
538- modelRoute := utils.LoadYAMLFromFile [networkingv1alpha1.ModelRoute ]("examples/kthena-router/ModelRouteWithRateLimit.yaml" )
539- modelRoute .Namespace = testNamespace
540- setupModelRouteWithGatewayAPI (modelRoute , useGatewayApi , kthenaNamespace )
541-
542- createdModelRoute , err := testCtx .KthenaClient .NetworkingV1alpha1 ().ModelRoutes (testNamespace ).Create (ctx , modelRoute , metav1.CreateOptions {})
543- require .NoError (t , err , "Failed to create ModelRoute" )
544-
545- t .Cleanup (func () {
546- cleanupCtx := context .Background ()
547- if err := testCtx .KthenaClient .NetworkingV1alpha1 ().ModelRoutes (testNamespace ).Delete (cleanupCtx , createdModelRoute .Name , metav1.DeleteOptions {}); err != nil {
548- t .Logf ("Warning: Failed to delete ModelRoute: %v" , err )
549- }
550- })
551-
552- require .Eventually (t , func () bool {
553- mr , err := testCtx .KthenaClient .NetworkingV1alpha1 ().ModelRoutes (testNamespace ).Get (ctx , createdModelRoute .Name , metav1.GetOptions {})
554- return err == nil && mr != nil
555- }, 2 * time .Minute , 2 * time .Second , "ModelRoute should be created" )
556-
557- // Exhaust quota to ensure rate limit is active
558- expectedSuccessfulRequests := inputTokenLimit / tokensPerRequest
559- for i := 0 ; i < expectedSuccessfulRequests ; i ++ {
560- resp := utils .SendChatRequest (t , createdModelRoute .Spec .ModelName , standardMessage )
561- resp .Body .Close ()
562- assert .Equal (t , http .StatusOK , resp .StatusCode , "Request %d should succeed" , i + 1 )
563- }
564-
565- // Verify rate limit is active
566- rateLimitedResp := utils .SendChatRequest (t , createdModelRoute .Spec .ModelName , standardMessage )
567- rateLimitedResp .Body .Close ()
568- assert .Equal (t , http .StatusTooManyRequests , rateLimitedResp .StatusCode ,
569- "Rate limit should be active after exhausting quota" )
570-
571- const halfWindowDuration = 10 * time .Second
572- t .Logf ("Waiting %v (within rate limit window)..." , halfWindowDuration )
573- time .Sleep (halfWindowDuration )
574-
575- midWindowResp := utils .SendChatRequest (t , createdModelRoute .Spec .ModelName , standardMessage )
576- midWindowResp .Body .Close ()
577- assert .Equal (t , http .StatusTooManyRequests , midWindowResp .StatusCode ,
578- "Rate limit should persist within the time window" )
579-
580- // Verify rate limit resets after window expiration (65 seconds > 60 seconds)
581- remainingWindowDuration := (rateLimitWindowSeconds * time .Second ) - halfWindowDuration + windowResetBuffer
582- t .Logf ("Waiting additional %v for window reset (total: %v)..." ,
583- remainingWindowDuration , halfWindowDuration + remainingWindowDuration )
584- time .Sleep (remainingWindowDuration )
585-
586- postWindowResp := utils .SendChatRequest (t , createdModelRoute .Spec .ModelName , standardMessage )
587- postWindowResp .Body .Close ()
588- assert .Equal (t , http .StatusOK , postWindowResp .StatusCode ,
589- "Request should succeed after rate limit window expires" )
590-
591- t .Log (" Rate limit window accuracy verified" )
592- })
593-
594- // Test 3: Verify rate limit reset mechanism
595- t .Run ("VerifyRateLimitResetMechanism" , func (t * testing.T ) {
596- t .Log ("Test 3: Verifying rate limit reset mechanism..." )
597-
598- modelRoute := utils.LoadYAMLFromFile [networkingv1alpha1.ModelRoute ]("examples/kthena-router/ModelRouteWithRateLimit.yaml" )
599- modelRoute .Namespace = testNamespace
600- setupModelRouteWithGatewayAPI (modelRoute , useGatewayApi , kthenaNamespace )
601-
602- createdModelRoute , err := testCtx .KthenaClient .NetworkingV1alpha1 ().ModelRoutes (testNamespace ).Create (ctx , modelRoute , metav1.CreateOptions {})
603- require .NoError (t , err , "Failed to create ModelRoute" )
604-
605- t .Cleanup (func () {
606- cleanupCtx := context .Background ()
607- if err := testCtx .KthenaClient .NetworkingV1alpha1 ().ModelRoutes (testNamespace ).Delete (cleanupCtx , createdModelRoute .Name , metav1.DeleteOptions {}); err != nil {
608- t .Logf ("Warning: Failed to delete ModelRoute: %v" , err )
609- }
610- })
611-
612- require .Eventually (t , func () bool {
613- mr , err := testCtx .KthenaClient .NetworkingV1alpha1 ().ModelRoutes (testNamespace ).Get (ctx , createdModelRoute .Name , metav1.GetOptions {})
614- return err == nil && mr != nil
615- }, 2 * time .Minute , 2 * time .Second , "ModelRoute should be created" )
616-
617- // Consume the quota
618- expectedSuccessfulRequests := inputTokenLimit / tokensPerRequest
619- for i := 0 ; i < expectedSuccessfulRequests ; i ++ {
620- resp := utils .SendChatRequest (t , createdModelRoute .Spec .ModelName , standardMessage )
621- resp .Body .Close ()
622- assert .Equal (t , http .StatusOK , resp .StatusCode ,
623- "Request %d should succeed" , i + 1 )
624- }
625-
626- // Confirm rate limiting is active
627- preResetResp := utils .SendChatRequest (t , createdModelRoute .Spec .ModelName , standardMessage )
628- preResetResp .Body .Close ()
629- assert .Equal (t , http .StatusTooManyRequests , preResetResp .StatusCode ,
630- "Rate limit should be active before window reset" )
631-
632- // Wait for complete window reset
633- windowResetDuration := (rateLimitWindowSeconds * time .Second ) + windowResetBuffer
634- t .Logf ("Waiting %v for complete rate limit window reset..." , windowResetDuration )
635- time .Sleep (windowResetDuration )
636-
637- // Verify quota is restored after reset (should allow 2 requests again)
638- for i := 0 ; i < expectedSuccessfulRequests ; i ++ {
639- resp := utils .SendChatRequest (t , createdModelRoute .Spec .ModelName , standardMessage )
640- resp .Body .Close ()
641- assert .Equal (t , http .StatusOK , resp .StatusCode ,
642- "Request %d should succeed after reset" , i + 1 )
643- }
644-
645- // Verify rate limiting kicks in again after consuming quota
646- postResetRateLimitedResp := utils .SendChatRequest (t , createdModelRoute .Spec .ModelName , standardMessage )
647- postResetRateLimitedResp .Body .Close ()
648- assert .Equal (t , http .StatusTooManyRequests , postResetRateLimitedResp .StatusCode ,
649- "Rate limit should be active again after consuming quota" )
650-
651- t .Logf ("Rate limit reset mechanism verified (quota restored: %d requests)" , expectedSuccessfulRequests )
652- })
653-
654- // Test 4: Verify output token rate limit enforcement
655- t .Run ("VerifyOutputTokenRateLimitEnforcement" , func (t * testing.T ) {
656- t .Log ("Test 4: Verifying output token rate limit (100 tokens/minute)..." )
657-
658- modelRoute := utils.LoadYAMLFromFile [networkingv1alpha1.ModelRoute ]("examples/kthena-router/ModelRouteWithRateLimit.yaml" )
659- modelRoute .Namespace = testNamespace
660- setupModelRouteWithGatewayAPI (modelRoute , useGatewayApi , kthenaNamespace )
661-
662- createdModelRoute , err := testCtx .KthenaClient .NetworkingV1alpha1 ().ModelRoutes (testNamespace ).Create (ctx , modelRoute , metav1.CreateOptions {})
663- require .NoError (t , err , "Failed to create ModelRoute" )
664-
665- t .Cleanup (func () {
666- cleanupCtx := context .Background ()
667- if err := testCtx .KthenaClient .NetworkingV1alpha1 ().ModelRoutes (testNamespace ).Delete (cleanupCtx , createdModelRoute .Name , metav1.DeleteOptions {}); err != nil {
668- t .Logf ("Warning: Failed to delete ModelRoute: %v" , err )
669- }
670- })
671-
672- require .Eventually (t , func () bool {
673- mr , err := testCtx .KthenaClient .NetworkingV1alpha1 ().ModelRoutes (testNamespace ).Get (ctx , createdModelRoute .Name , metav1.GetOptions {})
674- return err == nil && mr != nil
675- }, 2 * time .Minute , 2 * time .Second , "ModelRoute should be created" )
676-
677- // Update ModelRoute to disable input token limit
678- createdModelRoute .Spec .RateLimit .InputTokensPerUnit = nil
679- outputLimit := uint32 (outputTokenLimit )
680- createdModelRoute .Spec .RateLimit .OutputTokensPerUnit = & outputLimit
681-
682- updatedModelRoute , err := testCtx .KthenaClient .NetworkingV1alpha1 ().ModelRoutes (testNamespace ).Update (ctx , createdModelRoute , metav1.UpdateOptions {})
683- require .NoError (t , err , "Failed to update ModelRoute" )
684-
685- // Wait for update to propagate
686- time .Sleep (2 * time .Second )
687-
688- longerPrompt := []utils.ChatMessage {
689- utils .NewChatMessage ("user" , "Write a detailed explanation of rate limiting" ),
690- }
691-
692- // Send requests until we hit the output token limit
693- var successfulRequests int
694- var totalResponseSize int
695- var rateLimited bool
696-
697- for attempt := 0 ; attempt < 20 ; attempt ++ {
698- resp := utils .SendChatRequest (t , updatedModelRoute .Spec .ModelName , longerPrompt )
699- responseBody , readErr := io .ReadAll (resp .Body )
700- resp .Body .Close ()
701-
702- require .NoError (t , readErr , "Failed to read response body" )
703-
704- if resp .StatusCode == http .StatusOK {
705- successfulRequests ++
706- totalResponseSize += len (responseBody )
707- t .Logf ("Request %d succeeded, response size: %d bytes (total: %d bytes)" ,
708- attempt + 1 , len (responseBody ), totalResponseSize )
709- } else if resp .StatusCode == http .StatusTooManyRequests {
710- t .Logf ("Output rate limited after %d requests" , successfulRequests )
711- assert .Contains (t , strings .ToLower (string (responseBody )), "rate limit" ,
712- "Output rate limit error should mention rate limit" )
713- rateLimited = true
714- break
715- } else {
716- t .Fatalf ("Unexpected HTTP status code %d on attempt %d" , resp .StatusCode , attempt + 1 )
717- }
718- }
719-
720- // Verify output rate limiting was enforced
721- assert .True (t , rateLimited , "Expected output rate limiting to be enforced" )
722- assert .Greater (t , successfulRequests , 0 ,
723- "Expected at least one successful request before output rate limiting" )
724-
725- t .Logf (" Output token rate limit enforced after %d requests" , successfulRequests )
726- })
727- }
728-
729458// TestModelRouteWithGlobalRateLimitShared is a shared test function that can be used by both
730459// router and gateway-api test suites. When useGatewayAPI is true, it configures ModelRoute
731460// with ParentRefs to the default Gateway.
0 commit comments