|
| 1 | +package testcases |
| 2 | + |
| 3 | +import ( |
| 4 | + "bytes" |
| 5 | + "context" |
| 6 | + "encoding/json" |
| 7 | + "fmt" |
| 8 | + "io" |
| 9 | + "net/http" |
| 10 | + "os" |
| 11 | + "time" |
| 12 | + |
| 13 | + pkgtestcases "github.com/vllm-project/semantic-router/e2e/pkg/testcases" |
| 14 | + "k8s.io/client-go/kubernetes" |
| 15 | +) |
| 16 | + |
| 17 | +func init() { |
| 18 | + pkgtestcases.Register("keyword-routing", pkgtestcases.TestCase{ |
| 19 | + Description: "Test keyword routing accuracy and verify routing decisions", |
| 20 | + Tags: []string{"ai-gateway", "keyword-routing", "classification"}, |
| 21 | + Fn: testKeywordRouting, |
| 22 | + }) |
| 23 | +} |
| 24 | + |
| 25 | +// KeywordRoutingCase represents a test case for keyword routing |
| 26 | +type KeywordRoutingCase struct { |
| 27 | + Name string `json:"name"` |
| 28 | + Description string `json:"description"` |
| 29 | + Query string `json:"query"` |
| 30 | + ExpectedCategory string `json:"expected_category"` |
| 31 | + ExpectedConfidence float64 `json:"expected_confidence"` |
| 32 | + MatchedKeywords []string `json:"matched_keywords"` |
| 33 | +} |
| 34 | + |
| 35 | +// KeywordRoutingResult tracks the result of a single keyword routing test |
| 36 | +type KeywordRoutingResult struct { |
| 37 | + Query string |
| 38 | + ExpectedCategory string |
| 39 | + ActualCategory string |
| 40 | + Correct bool |
| 41 | + Error string |
| 42 | +} |
| 43 | + |
| 44 | +func testKeywordRouting(ctx context.Context, client *kubernetes.Clientset, opts pkgtestcases.TestCaseOptions) error { |
| 45 | + if opts.Verbose { |
| 46 | + fmt.Println("[Test] Testing keyword routing accuracy") |
| 47 | + } |
| 48 | + |
| 49 | + // Setup service connection and get local port |
| 50 | + localPort, stopPortForward, err := setupServiceConnection(ctx, client, opts) |
| 51 | + if err != nil { |
| 52 | + return err |
| 53 | + } |
| 54 | + defer stopPortForward() |
| 55 | + |
| 56 | + // Load test cases from JSON file |
| 57 | + testCases, err := loadKeywordRoutingCases("testdata/keyword_routing_cases.json") |
| 58 | + if err != nil { |
| 59 | + return fmt.Errorf("failed to load test cases: %w", err) |
| 60 | + } |
| 61 | + |
| 62 | + if opts.Verbose { |
| 63 | + fmt.Printf("[Test] Loaded %d keyword routing test cases\n", len(testCases)) |
| 64 | + } |
| 65 | + |
| 66 | + // Test each case |
| 67 | + results := make([]KeywordRoutingResult, 0, len(testCases)) |
| 68 | + successCount := 0 |
| 69 | + |
| 70 | + for i, tc := range testCases { |
| 71 | + if opts.Verbose { |
| 72 | + fmt.Printf("[Test] %d/%d: Testing %s\n", i+1, len(testCases), tc.Name) |
| 73 | + } |
| 74 | + |
| 75 | + result := KeywordRoutingResult{ |
| 76 | + Query: tc.Query, |
| 77 | + ExpectedCategory: tc.ExpectedCategory, |
| 78 | + } |
| 79 | + |
| 80 | + // Make classification request |
| 81 | + category, err := classifyKeywordQuery(ctx, localPort, tc.Query) |
| 82 | + if err != nil { |
| 83 | + result.Error = err.Error() |
| 84 | + results = append(results, result) |
| 85 | + continue |
| 86 | + } |
| 87 | + |
| 88 | + result.ActualCategory = category |
| 89 | + result.Correct = (category == tc.ExpectedCategory) |
| 90 | + |
| 91 | + if result.Correct { |
| 92 | + successCount++ |
| 93 | + } |
| 94 | + |
| 95 | + results = append(results, result) |
| 96 | + |
| 97 | + if opts.Verbose { |
| 98 | + if result.Correct { |
| 99 | + fmt.Printf(" ✓ PASS: Expected '%s', got '%s'\n", tc.ExpectedCategory, category) |
| 100 | + } else { |
| 101 | + fmt.Printf(" ✗ FAIL: Expected '%s', got '%s'\n", tc.ExpectedCategory, category) |
| 102 | + } |
| 103 | + } |
| 104 | + } |
| 105 | + |
| 106 | + // Print summary |
| 107 | + accuracy := float64(successCount) / float64(len(testCases)) * 100 |
| 108 | + fmt.Printf("\n=== Keyword Routing Test Summary ===\n") |
| 109 | + fmt.Printf("Total tests: %d\n", len(testCases)) |
| 110 | + fmt.Printf("Passed: %d\n", successCount) |
| 111 | + fmt.Printf("Failed: %d\n", len(testCases)-successCount) |
| 112 | + fmt.Printf("Accuracy: %.2f%%\n", accuracy) |
| 113 | + |
| 114 | + // Print failures if any |
| 115 | + if successCount < len(testCases) { |
| 116 | + fmt.Println("\n=== Failures ===") |
| 117 | + for _, result := range results { |
| 118 | + if !result.Correct { |
| 119 | + fmt.Printf("Query: %s\n", result.Query) |
| 120 | + fmt.Printf(" Expected: %s\n", result.ExpectedCategory) |
| 121 | + fmt.Printf(" Got: %s\n", result.ActualCategory) |
| 122 | + if result.Error != "" { |
| 123 | + fmt.Printf(" Error: %s\n", result.Error) |
| 124 | + } |
| 125 | + } |
| 126 | + } |
| 127 | + } |
| 128 | + |
| 129 | + // Require at least 80% accuracy |
| 130 | + if accuracy < 80.0 { |
| 131 | + return fmt.Errorf("keyword routing accuracy %.2f%% is below threshold of 80%%", accuracy) |
| 132 | + } |
| 133 | + |
| 134 | + return nil |
| 135 | +} |
| 136 | + |
| 137 | +// classifyKeywordQuery sends a classification request to the router |
| 138 | +func classifyKeywordQuery(ctx context.Context, localPort string, query string) (string, error) { |
| 139 | + // Create HTTP request payload |
| 140 | + payload := map[string]interface{}{ |
| 141 | + "model": "gpt-3.5-turbo", |
| 142 | + "messages": []map[string]string{ |
| 143 | + { |
| 144 | + "role": "user", |
| 145 | + "content": query, |
| 146 | + }, |
| 147 | + }, |
| 148 | + } |
| 149 | + |
| 150 | + jsonData, err := json.Marshal(payload) |
| 151 | + if err != nil { |
| 152 | + return "", fmt.Errorf("failed to marshal request: %w", err) |
| 153 | + } |
| 154 | + |
| 155 | + // Make HTTP request |
| 156 | + url := fmt.Sprintf("http://localhost:%s/v1/chat/completions", localPort) |
| 157 | + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewBuffer(jsonData)) |
| 158 | + if err != nil { |
| 159 | + return "", fmt.Errorf("failed to create request: %w", err) |
| 160 | + } |
| 161 | + |
| 162 | + req.Header.Set("Content-Type", "application/json") |
| 163 | + |
| 164 | + client := &http.Client{Timeout: 30 * time.Second} |
| 165 | + resp, err := client.Do(req) |
| 166 | + if err != nil { |
| 167 | + return "", fmt.Errorf("request failed: %w", err) |
| 168 | + } |
| 169 | + defer resp.Body.Close() |
| 170 | + |
| 171 | + body, err := io.ReadAll(resp.Body) |
| 172 | + if err != nil { |
| 173 | + return "", fmt.Errorf("failed to read response: %w", err) |
| 174 | + } |
| 175 | + |
| 176 | + // Extract classification from response headers |
| 177 | + category := resp.Header.Get("X-VSR-Category") |
| 178 | + if category == "" { |
| 179 | + // If no category header, check if it's an error response |
| 180 | + if resp.StatusCode != http.StatusOK { |
| 181 | + return "", fmt.Errorf("request failed with status %d: %s", resp.StatusCode, string(body)) |
| 182 | + } |
| 183 | + // No category means no match (empty category) |
| 184 | + return "", nil |
| 185 | + } |
| 186 | + |
| 187 | + return category, nil |
| 188 | +} |
| 189 | + |
| 190 | +// loadKeywordRoutingCases loads test cases from JSON file |
| 191 | +func loadKeywordRoutingCases(filepath string) ([]KeywordRoutingCase, error) { |
| 192 | + data, err := os.ReadFile(filepath) |
| 193 | + if err != nil { |
| 194 | + return nil, fmt.Errorf("failed to read test data file %s: %w", filepath, err) |
| 195 | + } |
| 196 | + |
| 197 | + var cases []KeywordRoutingCase |
| 198 | + if err := json.Unmarshal(data, &cases); err != nil { |
| 199 | + return nil, fmt.Errorf("failed to parse keyword routing test cases: %w", err) |
| 200 | + } |
| 201 | + |
| 202 | + return cases, nil |
| 203 | +} |
0 commit comments