vllm-project
diff --git a/‎e2e/README.md‎
Lines changed: 12 additions & 0 deletions b/‎e2e/README.md‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎e2e/profiles/ai-gateway/profile.go‎
Lines changed: 1 addition & 0 deletions b/‎e2e/profiles/ai-gateway/profile.go‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎e2e/testcases/chat_completions_progressive_stress.go‎
Lines changed: 239 additions & 0 deletions b/‎e2e/testcases/chat_completions_progressive_stress.go‎
Lines changed: 239 additions & 0 deletions
diff --git a/‎e2e/testcases/chat_completions_stress_request.go‎
Lines changed: 2 additions & 2 deletions b/‎e2e/testcases/chat_completions_stress_request.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎e2e/testcases/common.go‎
Lines changed: 90 additions & 0 deletions b/‎e2e/testcases/common.go‎
Lines changed: 90 additions & 0 deletions
@@ -53,6 +53,8 @@ The framework includes the following test cases (all in `e2e/testcases/`):
 | Test Case | Description | Metrics |
 |-----------|-------------|---------|
 | `chat-completions-request` | Basic chat completions API test | Response validation |
+| `chat-completions-stress-request` | Sequential stress test with 1000 requests | Success rate, avg duration |
+| `chat-completions-progressive-stress` | Progressive QPS stress test (10/20/50/100 QPS) | Per-stage success rate, latency stats |
 | `domain-classify` | Domain classification accuracy | 65 cases, accuracy rate |
 | `cache` | Semantic cache hit rate | 5 groups, cache hit rate |
 | `pii-detection` | PII detection and blocking | 10 PII types, detection rate, block rate |
@@ -85,6 +87,16 @@ make e2e-test
 make e2e-test PROFILE=ai-gateway
 ```
 
+### Run specific test cases
+
+```bash
+# Run only the progressive stress test
+./e2e/cmd/e2e/e2e --profile ai-gateway --test-cases chat-completions-progressive-stress --verbose
+
+# Run multiple specific test cases
+./e2e/cmd/e2e/e2e --profile ai-gateway --test-cases chat-completions-request,chat-completions-progressive-stress
+```
+
 ### Run with custom options
 
 ```bash
 
@@ -109,6 +109,7 @@ func (p *Profile) GetTestCases() []string {
 	return []string{
 		"chat-completions-request",
 		"chat-completions-stress-request",
+		"chat-completions-progressive-stress",
 		"domain-classify",
 		"semantic-cache",
 		"pii-detection",
 
@@ -0,0 +1,239 @@
+package testcases
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"sync"
+	"time"
+
+	pkgtestcases "github.com/vllm-project/semantic-router/e2e/pkg/testcases"
+	"k8s.io/client-go/kubernetes"
+)
+
+func init() {
+	pkgtestcases.Register("chat-completions-progressive-stress", pkgtestcases.TestCase{
+		Description: "Progressive stress test with 10/20/50/100 QPS and success rate tracking",
+		Tags:        []string{"llm", "stress", "progressive", "qps"},
+		Fn:          testProgressiveStress,
+	})
+}
+
+// ProgressiveStageResult tracks results for a single QPS stage
+type ProgressiveStageResult struct {
+	QPS          int
+	TotalReqs    int
+	SuccessCount int
+	FailureCount int
+	SuccessRate  float64
+	AvgDuration  time.Duration
+	MinDuration  time.Duration
+	MaxDuration  time.Duration
+	Results      []StressTestResult
+}
+
+func testProgressiveStress(ctx context.Context, client *kubernetes.Clientset, opts pkgtestcases.TestCaseOptions) error {
+	if opts.Verbose {
+		fmt.Println("[Test] Starting progressive stress test: 10/20/50/100 QPS")
+	}
+
+	// Setup service connection and get local port
+	localPort, stopPortForward, err := setupServiceConnection(ctx, client, opts)
+	if err != nil {
+		return err
+	}
+	defer stopPortForward()
+
+	// Define QPS stages and duration for each stage
+	qpsStages := []int{10, 20, 50, 100}
+	stageDuration := 30 * time.Second // Run each stage for 30 seconds
+
+	var stageResults []ProgressiveStageResult
+
+	// Run each QPS stage
+	for _, qps := range qpsStages {
+		if opts.Verbose {
+			fmt.Printf("\n[Test] Starting stage: %d QPS for %v\n", qps, stageDuration)
+		}
+
+		stageResult := runQPSStage(ctx, qps, stageDuration, localPort)
+		stageResults = append(stageResults, stageResult)
+
+		if opts.Verbose {
+			fmt.Printf("[Test] Stage %d QPS completed: %d/%d successful (%.2f%% success rate)\n",
+				qps, stageResult.SuccessCount, stageResult.TotalReqs, stageResult.SuccessRate)
+		}
+
+		// Brief pause between stages
+		time.Sleep(2 * time.Second)
+	}
+
+	// Print comprehensive summary
+	printProgressiveResults(stageResults)
+
+	// Set details for reporting
+	if opts.SetDetails != nil {
+		details := make(map[string]interface{})
+		for _, stage := range stageResults {
+			stageKey := fmt.Sprintf("qps_%d", stage.QPS)
+			details[stageKey] = map[string]interface{}{
+				"total_requests": stage.TotalReqs,
+				"successful":     stage.SuccessCount,
+				"failed":         stage.FailureCount,
+				"success_rate":   fmt.Sprintf("%.2f%%", stage.SuccessRate),
+				"avg_duration":   stage.AvgDuration.Milliseconds(),
+				"min_duration":   stage.MinDuration.Milliseconds(),
+				"max_duration":   stage.MaxDuration.Milliseconds(),
+			}
+		}
+		opts.SetDetails(details)
+	}
+
+	return nil
+}
+
+func runQPSStage(ctx context.Context, qps int, duration time.Duration, localPort string) ProgressiveStageResult {
+	result := ProgressiveStageResult{
+		QPS:         qps,
+		MinDuration: time.Hour, // Initialize with large value
+	}
+
+	var mu sync.Mutex
+	var wg sync.WaitGroup
+
+	// Calculate interval between requests
+	interval := time.Second / time.Duration(qps)
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+
+	// Context with timeout for this stage
+	stageCtx, cancel := context.WithTimeout(ctx, duration)
+	defer cancel()
+
+	requestID := 0
+
+	// Send requests at the specified QPS rate
+	for {
+		select {
+		case <-stageCtx.Done():
+			// Stage duration completed, wait for all in-flight requests
+			wg.Wait()
+			return calculateStageStats(result)
+
+		case <-ticker.C:
+			requestID++
+			wg.Add(1)
+
+			go func(reqID int) {
+				defer wg.Done()
+
+				// Send request
+				reqResult := sendSingleRequest(ctx, reqID, localPort, false)
+
+				// Update results
+				mu.Lock()
+				result.Results = append(result.Results, reqResult)
+				result.TotalReqs++
+				if reqResult.Success {
+					result.SuccessCount++
+				} else {
+					result.FailureCount++
+				}
+				mu.Unlock()
+			}(requestID)
+		}
+	}
+}
+
+func calculateStageStats(result ProgressiveStageResult) ProgressiveStageResult {
+	if result.TotalReqs == 0 {
+		return result
+	}
+
+	// Calculate success rate
+	result.SuccessRate = float64(result.SuccessCount) / float64(result.TotalReqs) * 100
+
+	// Calculate duration statistics
+	var totalDuration time.Duration
+	for _, r := range result.Results {
+		totalDuration += r.Duration
+		if r.Duration < result.MinDuration {
+			result.MinDuration = r.Duration
+		}
+		if r.Duration > result.MaxDuration {
+			result.MaxDuration = r.Duration
+		}
+	}
+
+	if len(result.Results) > 0 {
+		result.AvgDuration = totalDuration / time.Duration(len(result.Results))
+	}
+
+	// Reset MinDuration if it wasn't updated
+	if result.MinDuration == time.Hour {
+		result.MinDuration = 0
+	}
+
+	return result
+}
+
+func printProgressiveResults(stageResults []ProgressiveStageResult) {
+	separator := strings.Repeat("=", 100)
+	fmt.Println("\n" + separator)
+	fmt.Println("Progressive Stress Test Results")
+	fmt.Println(separator)
+
+	// Print header
+	fmt.Printf("%-10s %-15s %-15s %-15s %-15s %-15s %-15s\n",
+		"QPS", "Total Reqs", "Successful", "Failed", "Success Rate", "Avg Duration", "Max Duration")
+	fmt.Println(strings.Repeat("-", 100))
+
+	// Print each stage
+	for _, stage := range stageResults {
+		fmt.Printf("%-10d %-15d %-15d %-15d %-15s %-15v %-15v\n",
+			stage.QPS,
+			stage.TotalReqs,
+			stage.SuccessCount,
+			stage.FailureCount,
+			fmt.Sprintf("%.2f%%", stage.SuccessRate),
+			stage.AvgDuration.Round(time.Millisecond),
+			stage.MaxDuration.Round(time.Millisecond))
+	}
+
+	fmt.Println(separator)
+
+	// Print summary statistics
+	fmt.Println("\nSummary:")
+	totalRequests := 0
+	totalSuccess := 0
+	for _, stage := range stageResults {
+		totalRequests += stage.TotalReqs
+		totalSuccess += stage.SuccessCount
+	}
+	overallSuccessRate := float64(totalSuccess) / float64(totalRequests) * 100
+	fmt.Printf("  Overall: %d/%d successful (%.2f%% success rate)\n",
+		totalSuccess, totalRequests, overallSuccessRate)
+
+	// Show failures for each stage
+	fmt.Println("\nFailures by Stage:")
+	for _, stage := range stageResults {
+		if stage.FailureCount > 0 {
+			fmt.Printf("  %d QPS: %d failures\n", stage.QPS, stage.FailureCount)
+			// Show first 3 failures for this stage
+			failureCount := 0
+			for _, result := range stage.Results {
+				if !result.Success && failureCount < 3 {
+					failureCount++
+					fmt.Printf("    Request #%d: %s (duration: %v)\n",
+						result.RequestID, result.ErrorMessage, result.Duration)
+				}
+				if failureCount >= 3 {
+					break
+				}
+			}
+		} else {
+			fmt.Printf("  %d QPS: No failures! 🎉\n", stage.QPS)
+		}
+	}
+	fmt.Println()
+}
@@ -101,13 +101,13 @@ func sendSingleRequest(ctx context.Context, requestID int, localPort string, ver
 
 	start := time.Now()
 
-	// Prepare request body
+	// Prepare request body with random content
 	requestBody := map[string]interface{}{
 		"model": "MoM",
 		"messages": []map[string]string{
 			{
 				"role":    "user",
-				"content": fmt.Sprintf("Request #%d: What is 2+2?", requestID),
+				"content": generateRandomContent(requestID),
 			},
 		},
 	}
 
@@ -3,6 +3,7 @@ package testcases
 import (
 	"context"
 	"fmt"
+	"math/rand"
 	"strings"
 	"time"
 
@@ -54,3 +55,92 @@ func setupServiceConnection(ctx context.Context, client *kubernetes.Clientset, o
 
 	return portParts[0], stopFunc, nil
 }
+
+// Random content generation for stress tests
+
+var (
+	// Question templates for variety
+	questionTemplates = []string{
+		"What is %d + %d?",
+		"Calculate %d * %d",
+		"What is the result of %d - %d?",
+		"Solve: %d divided by %d",
+		"What is %d%% of %d?",
+		"If I have %d apples and buy %d more, how many do I have?",
+		"What is the square root of %d?",
+		"What is %d to the power of %d?",
+		"How many days are in %d weeks?",
+		"What is the average of %d and %d?",
+	}
+
+	topics = []string{
+		"Explain the concept of machine learning",
+		"What is the capital of France?",
+		"How does photosynthesis work?",
+		"What are the benefits of exercise?",
+		"Describe the water cycle",
+		"What is quantum computing?",
+		"How do vaccines work?",
+		"What causes climate change?",
+		"Explain blockchain technology",
+		"What is artificial intelligence?",
+		"How does the internet work?",
+		"What is the theory of relativity?",
+		"Describe the solar system",
+		"What is DNA?",
+		"How do airplanes fly?",
+	}
+
+	tasks = []string{
+		"Write a short poem about nature",
+		"Summarize the main points of renewable energy",
+		"List 5 programming languages",
+		"Describe a typical day in your life",
+		"Explain how to make a sandwich",
+		"Give me 3 tips for better sleep",
+		"What are the colors of the rainbow?",
+		"Name 5 countries in Europe",
+		"Describe your favorite hobby",
+		"What are the main food groups?",
+	}
+)
+
+// generateRandomContent generates random request content for stress testing
+func generateRandomContent(requestID int) string {
+	// Use requestID as seed for reproducibility within a test run
+	r := rand.New(rand.NewSource(time.Now().UnixNano() + int64(requestID)))
+
+	contentType := r.Intn(3)
+
+	switch contentType {
+	case 0:
+		// Math question
+		template := questionTemplates[r.Intn(len(questionTemplates))]
+		num1 := r.Intn(100) + 1
+		num2 := r.Intn(100) + 1
+		return fmt.Sprintf("Request #%d: "+template, requestID, num1, num2)
+	case 1:
+		// General topic
+		topic := topics[r.Intn(len(topics))]
+		return fmt.Sprintf("Request #%d: %s", requestID, topic)
+	default:
+		// Task
+		task := tasks[r.Intn(len(tasks))]
+		return fmt.Sprintf("Request #%d: %s", requestID, task)
+	}
+}
+
+// formatResponseHeaders formats HTTP response headers for logging
+func formatResponseHeaders(headers map[string][]string) string {
+	if len(headers) == 0 {
+		return "  (no headers)"
+	}
+
+	var sb strings.Builder
+	for key, values := range headers {
+		for _, value := range values {
+			sb.WriteString(fmt.Sprintf("  %s: %s\n", key, value))
+		}
+	}
+	return sb.String()
+}
Original file line number	Diff line number	Diff line change
`@@ -101,13 +101,13 @@ func sendSingleRequest(ctx context.Context, requestID int, localPort string, ver`
`101`	`101`
`102`	`102`	`start := time.Now()`
`103`	`103`
`104`		`- // Prepare request body`
	`104`	`+ // Prepare request body with random content`
`105`	`105`	`requestBody := map[string]interface{}{`
`106`	`106`	`"model": "MoM",`
`107`	`107`	`"messages": []map[string]string{`
`108`	`108`	`{`
`109`	`109`	`"role": "user",`
`110`		`- "content": fmt.Sprintf("Request #%d: What is 2+2?", requestID),`
	`110`	`+ "content": generateRandomContent(requestID),`
`111`	`111`	`},`
`112`	`112`	`},`
`113`	`113`	`}`