feat: optimize classifier and add ttft unit test

yuluo-yx · yuluo-yx · commit 6f9a8f0d4660 · 2025-09-06T16:54:34.000+08:00
Signed-off-by: yuluo-yx &lt;yuluo08290126@gmail.com&gt;
diff --git a/src/semantic-router/pkg/utils/classification/classifier.go b/src/semantic-router/pkg/utils/classification/classifier.go
@@ -3,6 +3,7 @@ package classification
 import (
 	"fmt"
 	"log"
+	"slices"
 	"strings"
 	"sync"
 	"time"
@@ -466,35 +467,24 @@ func (c *Classifier) SelectBestModelForCategory(categoryName string) string {
 	bestQuality := 0.0
 
 	if c.Config.Classifier.LoadAware {
-		// Load-aware: combine accuracy and TTFT
-		for _, modelScore := range cat.ModelScores {
+		c.forEachModelScore(cat, func(modelScore config.ModelScore) {
 			quality := modelScore.Score
 			model := modelScore.Model
-
 			baseTTFT := c.ModelTTFT[model]
 			load := c.ModelLoad[model]
 			estTTFT := baseTTFT * (1 + float64(load))
 			if estTTFT == 0 {
-				estTTFT = 1 // avoid div by zero
+				estTTFT = 1
 			}
 			score := quality / estTTFT
-			if score > bestScore {
-				bestScore = score
-				bestModel = model
-				bestQuality = quality
-			}
-		}
+			c.updateBestModel(score, quality, model, &bestScore, &bestQuality, &bestModel)
+		})
 	} else {
-		// Not load-aware: pick the model with the highest accuracy only
-		for _, modelScore := range cat.ModelScores {
+		c.forEachModelScore(cat, func(modelScore config.ModelScore) {
 			quality := modelScore.Score
 			model := modelScore.Model
-			if quality > bestScore {
-				bestScore = quality
-				bestModel = model
-				bestQuality = quality
-			}
-		}
+			c.updateBestModel(quality, quality, model, &bestScore, &bestQuality, &bestModel)
+		})
 	}
 
 	if bestModel == "" {
@@ -507,6 +497,13 @@ func (c *Classifier) SelectBestModelForCategory(categoryName string) string {
 	return bestModel
 }
 
+// forEachModelScore 遍历 category 的 ModelScores 并对每个元素执行回调
+func (c *Classifier) forEachModelScore(cat *config.Category, fn func(modelScore config.ModelScore)) {
+	for _, modelScore := range cat.ModelScores {
+		fn(modelScore)
+	}
+}
+
 // SelectBestModelFromList selects the best model from a list of candidate models for a given category
 func (c *Classifier) SelectBestModelFromList(candidateModels []string, categoryName string) string {
 	if len(candidateModels) == 0 {
@@ -534,49 +531,28 @@ func (c *Classifier) SelectBestModelFromList(candidateModels []string, categoryN
 	bestScore := -1.0
 	bestQuality := 0.0
 
-	if c.Config.Classifier.LoadAware {
-		// Load-aware: combine accuracy and TTFT
-		for _, modelScore := range cat.ModelScores {
-			model := modelScore.Model
-
-			// Check if this model is in the candidate list
-			if !c.contains(candidateModels, model) {
-				continue
-			}
-
-			quality := modelScore.Score
+	filteredFn := func(modelScore config.ModelScore) {
+		model := modelScore.Model
+		if !slices.Contains(candidateModels, model) {
+			return
+		}
+		quality := modelScore.Score
+		if c.Config.Classifier.LoadAware {
 			baseTTFT := c.ModelTTFT[model]
 			load := c.ModelLoad[model]
 			estTTFT := baseTTFT * (1 + float64(load))
 			if estTTFT == 0 {
 				estTTFT = 1 // avoid div by zero
 			}
 			score := quality / estTTFT
-			if score > bestScore {
-				bestScore = score
-				bestModel = model
-				bestQuality = quality
-			}
-		}
-	} else {
-		// Not load-aware: pick the model with the highest accuracy only
-		for _, modelScore := range cat.ModelScores {
-			model := modelScore.Model
-
-			// Check if this model is in the candidate list
-			if !c.contains(candidateModels, model) {
-				continue
-			}
-
-			quality := modelScore.Score
-			if quality > bestScore {
-				bestScore = quality
-				bestModel = model
-				bestQuality = quality
-			}
+			c.updateBestModel(score, quality, model, &bestScore, &bestQuality, &bestModel)
+		} else {
+			c.updateBestModel(quality, quality, model, &bestScore, &bestQuality, &bestModel)
 		}
 	}
 
+	c.forEachModelScore(cat, filteredFn)
+
 	if bestModel == "" {
 		log.Printf("No suitable model found from candidates for category %s, using first candidate", categoryName)
 		return candidateModels[0]
@@ -619,12 +595,11 @@ func (c *Classifier) DecrementModelLoad(model string) {
 	}
 }
 
-// contains checks if a slice contains a string
-func (c *Classifier) contains(slice []string, item string) bool {
-	for _, s := range slice {
-		if s == item {
-			return true
-		}
+// updateBestModel updates the best model, score, and quality if the new score is better.
+func (c *Classifier) updateBestModel(score, quality float64, model string, bestScore *float64, bestQuality *float64, bestModel *string) {
+	if score > *bestScore {
+		*bestScore = score
+		*bestModel = model
+		*bestQuality = quality
 	}
-	return false
 }
diff --git a/src/semantic-router/pkg/utils/ttft/calculator_test.go b/src/semantic-router/pkg/utils/ttft/calculator_test.go
@@ -0,0 +1,56 @@
+package ttft
+
+import (
+	"testing"
+
+	"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
+)
+
+func TestComputeBaseTTFT(t *testing.T) {
+
+	gpuConfig := config.GPUConfig{
+		FLOPS: 1e12, // 1 TFLOP
+		HBM:   1e11, // 100 GB/s
+	}
+	calculator := NewCalculator(gpuConfig)
+
+	routerCfg := &config.RouterConfig{}
+	// Mock config methods if needed, or set up fields so that
+	// GetModelParamCount, GetModelBatchSize, GetModelContextSize return defaults
+
+	ttft := calculator.ComputeBaseTTFT("test-model", routerCfg)
+	if ttft <= 0 {
+		t.Errorf("Expected TTFT > 0, got %f", ttft)
+	}
+}
+
+func TestInitializeModelTTFT(t *testing.T) {
+	gpuConfig := config.GPUConfig{
+		FLOPS: 1e12,
+		HBM:   1e11,
+	}
+	calculator := NewCalculator(gpuConfig)
+
+	// Minimal mock config with two categories and models
+	routerCfg := &config.RouterConfig{
+		Categories: []config.Category{
+			{
+				ModelScores: []config.ModelScore{
+					{Model: "model-a", Score: 0.9},
+					{Model: "model-b", Score: 0.8},
+				},
+			},
+		},
+		DefaultModel: "model-default",
+	}
+
+	modelTTFT := calculator.InitializeModelTTFT(routerCfg)
+	if len(modelTTFT) != 3 {
+		t.Errorf("Expected 3 models in TTFT map, got %d", len(modelTTFT))
+	}
+	for model, ttft := range modelTTFT {
+		if ttft <= 0 {
+			t.Errorf("Model %s has non-positive TTFT: %f", model, ttft)
+		}
+	}
+}