Merge branch 'main' into fix/go_mod_path

rootfs · web-flow · commit 523d3e7ae264 · 2025-09-06T10:36:23.000-04:00
diff --git a/.github/workflows/auto-merge.yml b/.github/workflows/auto-merge.yml
diff --git a/src/semantic-router/pkg/extproc/reason_mode_selector.go b/src/semantic-router/pkg/extproc/reason_mode_selector.go
@@ -47,13 +47,29 @@ func (r *OpenAIRouter) getReasoningModeAndCategory(query string) (bool, string)
 	return false, categoryName
 }
 
+// hasDeepSeekAlias returns true if the model uses a short alias for DeepSeek (e.g., "ds-*")
+// Rules:
+//   - Accept only when the model string starts with: "ds-", "ds_", "ds:", "ds " or exactly equals "ds"
+//   - Do NOT match occurrences of "ds" in the middle of the model name (e.g., "foo-ds-1b")
+func hasDeepSeekAlias(lower string) bool {
+	lower = strings.TrimSpace(lower)
+	if strings.HasPrefix(lower, "ds") {
+		if len(lower) == 2 { // exactly "ds"
+			return true
+		}
+		sep := lower[2]
+		return sep == '-' || sep == '_' || sep == ':' || sep == ' '
+	}
+	return false
+}
+
 // getModelFamilyAndTemplateParam returns a normalized model family name and the template param to be used (if any)
 func getModelFamilyAndTemplateParam(model string) (string, string) {
 	lower := strings.ToLower(strings.TrimSpace(model))
 	if strings.Contains(lower, "qwen3") {
 		return "qwen3", "enable_thinking"
 	}
-	if strings.Contains(lower, "deepseek") || strings.Contains(lower, "ds") {
+	if strings.Contains(lower, "deepseek") || hasDeepSeekAlias(lower) {
 		return "deepseek", "thinking"
 	}
 	// GPT-OSS family and generic GPT fall back to using reasoning_effort (OpenAI-compatible field)
diff --git a/src/semantic-router/pkg/extproc/reason_mode_selector_test.go b/src/semantic-router/pkg/extproc/reason_mode_selector_test.go
@@ -23,11 +23,17 @@ func TestGetModelFamilyAndTemplateParam(t *testing.T) {
 			expectedParam:  "thinking",
 		},
 		{
-			name:           "DeepSeek alias ds",
+			name:           "DeepSeek alias ds (prefix)",
 			model:          "DS-1.5B",
 			expectedFamily: "deepseek",
 			expectedParam:  "thinking",
 		},
+		{
+			name:           "Non-leading ds should not match DeepSeek",
+			model:          "mistral-ds-1b",
+			expectedFamily: "unknown",
+			expectedParam:  "",
+		},
 		{
 			name:           "GPT-OSS family",
 			model:          "gpt-oss-20b",
diff --git a/src/semantic-router/pkg/utils/classification/classifier.go b/src/semantic-router/pkg/utils/classification/classifier.go
@@ -3,6 +3,7 @@ package classification
 import (
 	"fmt"
 	"log"
+	"slices"
 	"strings"
 	"sync"
 	"time"
@@ -466,35 +467,24 @@ func (c *Classifier) SelectBestModelForCategory(categoryName string) string {
 	bestQuality := 0.0
 
 	if c.Config.Classifier.LoadAware {
-		// Load-aware: combine accuracy and TTFT
-		for _, modelScore := range cat.ModelScores {
+		c.forEachModelScore(cat, func(modelScore config.ModelScore) {
 			quality := modelScore.Score
 			model := modelScore.Model
-
 			baseTTFT := c.ModelTTFT[model]
 			load := c.ModelLoad[model]
 			estTTFT := baseTTFT * (1 + float64(load))
 			if estTTFT == 0 {
-				estTTFT = 1 // avoid div by zero
+				estTTFT = 1
 			}
 			score := quality / estTTFT
-			if score > bestScore {
-				bestScore = score
-				bestModel = model
-				bestQuality = quality
-			}
-		}
+			c.updateBestModel(score, quality, model, &bestScore, &bestQuality, &bestModel)
+		})
 	} else {
-		// Not load-aware: pick the model with the highest accuracy only
-		for _, modelScore := range cat.ModelScores {
+		c.forEachModelScore(cat, func(modelScore config.ModelScore) {
 			quality := modelScore.Score
 			model := modelScore.Model
-			if quality > bestScore {
-				bestScore = quality
-				bestModel = model
-				bestQuality = quality
-			}
-		}
+			c.updateBestModel(quality, quality, model, &bestScore, &bestQuality, &bestModel)
+		})
 	}
 
 	if bestModel == "" {
@@ -507,6 +497,13 @@ func (c *Classifier) SelectBestModelForCategory(categoryName string) string {
 	return bestModel
 }
 
+// forEachModelScore traverses the ModelScores document of the category and executes the callback for each element。
+func (c *Classifier) forEachModelScore(cat *config.Category, fn func(modelScore config.ModelScore)) {
+	for _, modelScore := range cat.ModelScores {
+		fn(modelScore)
+	}
+}
+
 // SelectBestModelFromList selects the best model from a list of candidate models for a given category
 func (c *Classifier) SelectBestModelFromList(candidateModels []string, categoryName string) string {
 	if len(candidateModels) == 0 {
@@ -534,49 +531,28 @@ func (c *Classifier) SelectBestModelFromList(candidateModels []string, categoryN
 	bestScore := -1.0
 	bestQuality := 0.0
 
-	if c.Config.Classifier.LoadAware {
-		// Load-aware: combine accuracy and TTFT
-		for _, modelScore := range cat.ModelScores {
-			model := modelScore.Model
-
-			// Check if this model is in the candidate list
-			if !c.contains(candidateModels, model) {
-				continue
-			}
-
-			quality := modelScore.Score
+	filteredFn := func(modelScore config.ModelScore) {
+		model := modelScore.Model
+		if !slices.Contains(candidateModels, model) {
+			return
+		}
+		quality := modelScore.Score
+		if c.Config.Classifier.LoadAware {
 			baseTTFT := c.ModelTTFT[model]
 			load := c.ModelLoad[model]
 			estTTFT := baseTTFT * (1 + float64(load))
 			if estTTFT == 0 {
 				estTTFT = 1 // avoid div by zero
 			}
 			score := quality / estTTFT
-			if score > bestScore {
-				bestScore = score
-				bestModel = model
-				bestQuality = quality
-			}
-		}
-	} else {
-		// Not load-aware: pick the model with the highest accuracy only
-		for _, modelScore := range cat.ModelScores {
-			model := modelScore.Model
-
-			// Check if this model is in the candidate list
-			if !c.contains(candidateModels, model) {
-				continue
-			}
-
-			quality := modelScore.Score
-			if quality > bestScore {
-				bestScore = quality
-				bestModel = model
-				bestQuality = quality
-			}
+			c.updateBestModel(score, quality, model, &bestScore, &bestQuality, &bestModel)
+		} else {
+			c.updateBestModel(quality, quality, model, &bestScore, &bestQuality, &bestModel)
 		}
 	}
 
+	c.forEachModelScore(cat, filteredFn)
+
 	if bestModel == "" {
 		log.Printf("No suitable model found from candidates for category %s, using first candidate", categoryName)
 		return candidateModels[0]
@@ -619,12 +595,11 @@ func (c *Classifier) DecrementModelLoad(model string) {
 	}
 }
 
-// contains checks if a slice contains a string
-func (c *Classifier) contains(slice []string, item string) bool {
-	for _, s := range slice {
-		if s == item {
-			return true
-		}
+// updateBestModel updates the best model, score, and quality if the new score is better.
+func (c *Classifier) updateBestModel(score, quality float64, model string, bestScore *float64, bestQuality *float64, bestModel *string) {
+	if score > *bestScore {
+		*bestScore = score
+		*bestModel = model
+		*bestQuality = quality
 	}
-	return false
 }
diff --git a/src/semantic-router/pkg/utils/classification/classifier_test.go b/src/semantic-router/pkg/utils/classification/classifier_test.go
@@ -406,3 +406,49 @@ var _ = Describe("PIIClassification", func() {
 		})
 	})
 })
+
+func TestUpdateBestModel(t *testing.T) {
+
+	classifier := &Classifier{}
+
+	bestScore := 0.5
+	bestQuality := 0.5
+	bestModel := "old-model"
+
+	classifier.updateBestModel(0.8, 0.9, "new-model", &bestScore, &bestQuality, &bestModel)
+	if bestScore != 0.8 || bestQuality != 0.9 || bestModel != "new-model" {
+		t.Errorf("update: got bestScore=%v, bestQuality=%v, bestModel=%v", bestScore, bestQuality, bestModel)
+	}
+
+	classifier.updateBestModel(0.7, 0.7, "another-model", &bestScore, &bestQuality, &bestModel)
+	if bestScore != 0.8 || bestQuality != 0.9 || bestModel != "new-model" {
+		t.Errorf("not update: got bestScore=%v, bestQuality=%v, bestModel=%v", bestScore, bestQuality, bestModel)
+	}
+}
+
+func TestForEachModelScore(t *testing.T) {
+
+	c := &Classifier{}
+	cat := &config.Category{
+		ModelScores: []config.ModelScore{
+			{Model: "model-a", Score: 0.9},
+			{Model: "model-b", Score: 0.8},
+			{Model: "model-c", Score: 0.7},
+		},
+	}
+
+	var models []string
+	c.forEachModelScore(cat, func(ms config.ModelScore) {
+		models = append(models, ms.Model)
+	})
+
+	expected := []string{"model-a", "model-b", "model-c"}
+	if len(models) != len(expected) {
+		t.Fatalf("expected %d models, got %d", len(expected), len(models))
+	}
+	for i, m := range expected {
+		if models[i] != m {
+			t.Errorf("expected model %s at index %d, got %s", m, i, models[i])
+		}
+	}
+}
diff --git a/src/semantic-router/pkg/utils/ttft/calculator_test.go b/src/semantic-router/pkg/utils/ttft/calculator_test.go
@@ -0,0 +1,56 @@
+package ttft
+
+import (
+	"testing"
+
+	"github.com/vllm-project/semantic-router/semantic-router/pkg/config"
+)
+
+func TestComputeBaseTTFT(t *testing.T) {
+
+	gpuConfig := config.GPUConfig{
+		FLOPS: 1e12, // 1 TFLOP
+		HBM:   1e11, // 100 GB/s
+	}
+	calculator := NewCalculator(gpuConfig)
+
+	routerCfg := &config.RouterConfig{}
+	// Mock config methods if needed, or set up fields so that
+	// GetModelParamCount, GetModelBatchSize, GetModelContextSize return defaults
+
+	ttft := calculator.ComputeBaseTTFT("test-model", routerCfg)
+	if ttft <= 0 {
+		t.Errorf("Expected TTFT > 0, got %f", ttft)
+	}
+}
+
+func TestInitializeModelTTFT(t *testing.T) {
+	gpuConfig := config.GPUConfig{
+		FLOPS: 1e12,
+		HBM:   1e11,
+	}
+	calculator := NewCalculator(gpuConfig)
+
+	// Minimal mock config with two categories and models
+	routerCfg := &config.RouterConfig{
+		Categories: []config.Category{
+			{
+				ModelScores: []config.ModelScore{
+					{Model: "model-a", Score: 0.9},
+					{Model: "model-b", Score: 0.8},
+				},
+			},
+		},
+		DefaultModel: "model-default",
+	}
+
+	modelTTFT := calculator.InitializeModelTTFT(routerCfg)
+	if len(modelTTFT) != 3 {
+		t.Errorf("Expected 3 models in TTFT map, got %d", len(modelTTFT))
+	}
+	for model, ttft := range modelTTFT {
+		if ttft <= 0 {
+			t.Errorf("Model %s has non-positive TTFT: %f", model, ttft)
+		}
+	}
+}
diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js
@@ -112,6 +112,17 @@ const config = {
               },
             ],
           },
+          {
+            type: 'dropdown',
+            label: 'Roadmap',
+            position: 'left',
+            items: [
+              {
+                label: 'v0.1',
+                to: '/roadmap/v0.1',
+              },
+            ],
+          },
           {
             href: 'https://github.com/vllm-project/semantic-router',
             label: 'GitHub',
diff --git a/website/src/pages/roadmap/roadmap.module.css b/website/src/pages/roadmap/roadmap.module.css
diff --git a/website/src/pages/roadmap/v0.1.js b/website/src/pages/roadmap/v0.1.js