refactor: remove load aware algorithm

aeft · aeft · commit ad93419eb4d5 · 2025-09-09T11:19:47.000-07:00
Signed-off-by: Alex Wang &lt;yesterda9@gmail.com&gt;
diff --git a/config/config.yaml b/config/config.yaml
@@ -90,7 +90,6 @@ classifier:
     threshold: 0.7
     use_cpu: true
     pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
-  load_aware: false
 categories:
 - name: business
   use_reasoning: false
diff --git a/deploy/kubernetes/config.yaml b/deploy/kubernetes/config.yaml
@@ -78,7 +78,6 @@ classifier:
     threshold: 0.7
     use_cpu: true
     pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
-  load_aware: false
 categories:
 - name: business
   model_scores:
diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go
@@ -33,7 +33,6 @@ type RouterConfig struct {
 			UseCPU         bool    `yaml:"use_cpu"`
 			PIIMappingPath string  `yaml:"pii_mapping_path"`
 		} `yaml:"pii_model"`
-		LoadAware bool `yaml:"load_aware"`
 	} `yaml:"classifier"`
 
 	// Categories for routing queries
diff --git a/src/semantic-router/pkg/config/config_test.go b/src/semantic-router/pkg/config/config_test.go
@@ -60,7 +60,6 @@ classifier:
     use_cpu: true
     use_modernbert: false
     pii_mapping_path: "/path/to/pii.json"
-  load_aware: true
 
 categories:
   - name: "general"
@@ -138,7 +137,6 @@ tools:
 				// Verify classifier config
 				Expect(cfg.Classifier.CategoryModel.ModelID).To(Equal("test-category-model"))
 				Expect(cfg.Classifier.CategoryModel.UseModernBERT).To(BeTrue())
-				Expect(cfg.Classifier.LoadAware).To(BeTrue())
 
 				// Verify categories
 				Expect(cfg.Categories).To(HaveLen(1))
diff --git a/src/semantic-router/pkg/extproc/request_handler.go b/src/semantic-router/pkg/extproc/request_handler.go
@@ -370,9 +370,6 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
 				effortForMetrics := r.getReasoningEffort(categoryName)
 				metrics.RecordReasoningDecision(categoryName, matchedModel, useReasoning, effortForMetrics)
 
-				// Track the model load for the selected model
-				r.Classifier.IncrementModelLoad(matchedModel)
-
 				// Track the model routing change
 				metrics.RecordModelRouting(originalModel, matchedModel)
 
diff --git a/src/semantic-router/pkg/extproc/response_handler.go b/src/semantic-router/pkg/extproc/response_handler.go
@@ -52,7 +52,6 @@ func (r *OpenAIRouter) handleResponseBody(v *ext_proc.ProcessingRequest_Response
 			float64(completionTokens),
 		)
 		metrics.RecordModelCompletionLatency(ctx.RequestModel, completionLatency.Seconds())
-		r.Classifier.DecrementModelLoad(ctx.RequestModel)
 
 		// Compute and record cost if pricing is configured
 		if r.Config != nil {
diff --git a/src/semantic-router/pkg/extproc/router.go b/src/semantic-router/pkg/extproc/router.go
@@ -131,8 +131,7 @@ func NewOpenAIRouter(configPath string) (*OpenAIRouter, error) {
 
 	// Create utility components
 	piiChecker := pii.NewPolicyChecker(cfg, cfg.ModelConfig)
-	modelTTFT := make(map[string]float64) // Empty TTFT map since load balancing is disabled
-	classifier := classification.NewClassifier(cfg, categoryMapping, piiMapping, jailbreakMapping, modelTTFT)
+	classifier := classification.NewClassifier(cfg, categoryMapping, piiMapping, jailbreakMapping)
 
 	// Create global classification service for API access
 	services.NewClassificationService(classifier, cfg)
diff --git a/src/semantic-router/pkg/extproc/security_test.go b/src/semantic-router/pkg/extproc/security_test.go
@@ -52,7 +52,7 @@ var _ = Describe("Security Checks", func() {
 				},
 			}
 			router.PIIChecker = pii.NewPolicyChecker(cfg, cfg.ModelConfig)
-			router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, router.Classifier.PIIMapping, nil, router.Classifier.ModelTTFT)
+			router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, router.Classifier.PIIMapping, nil)
 		})
 
 		It("should allow requests with no PII", func() {
@@ -97,7 +97,7 @@ var _ = Describe("Security Checks", func() {
 			piiMapping, err := classification.LoadPIIMapping(cfg.Classifier.PIIModel.PIIMappingPath)
 			Expect(err).NotTo(HaveOccurred())
 
-			router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, piiMapping, nil, router.Classifier.ModelTTFT)
+			router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, piiMapping, nil)
 		})
 
 		Describe("ClassifyPII method", func() {
@@ -339,7 +339,7 @@ var _ = Describe("Security Checks", func() {
 			piiMapping, err := classification.LoadPIIMapping(cfg.Classifier.PIIModel.PIIMappingPath)
 			Expect(err).NotTo(HaveOccurred())
 
-			router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, piiMapping, nil, router.Classifier.ModelTTFT)
+			router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, piiMapping, nil)
 		})
 
 		Describe("Error handling and edge cases", func() {
@@ -524,7 +524,7 @@ var _ = Describe("Security Checks", func() {
 				IdxToLabel: map[string]string{"0": "benign", "1": "jailbreak"},
 			}
 
-			router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, router.Classifier.PIIMapping, jailbreakMapping, router.Classifier.ModelTTFT)
+			router.Classifier = classification.NewClassifier(cfg, router.Classifier.CategoryMapping, router.Classifier.PIIMapping, jailbreakMapping)
 		})
 
 		It("should process potential jailbreak attempts", func() {
diff --git a/src/semantic-router/pkg/extproc/test_utils_test.go b/src/semantic-router/pkg/extproc/test_utils_test.go
@@ -95,7 +95,6 @@ func CreateTestConfig() *config.RouterConfig {
 				UseCPU         bool    `yaml:"use_cpu"`
 				PIIMappingPath string  `yaml:"pii_mapping_path"`
 			} `yaml:"pii_model"`
-			LoadAware bool `yaml:"load_aware"`
 		}{
 			CategoryModel: struct {
 				ModelID             string  `yaml:"model_id"`
@@ -119,7 +118,6 @@ func CreateTestConfig() *config.RouterConfig {
 				UseCPU:         true,
 				PIIMappingPath: "../../../../models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json",
 			},
-			LoadAware: true,
 		},
 		Categories: []config.Category{
 			{
@@ -220,11 +218,7 @@ func CreateTestRouter(cfg *config.RouterConfig) (*extproc.OpenAIRouter, error) {
 	toolsDatabase := tools.NewToolsDatabase(toolsOptions)
 
 	// Create classifier
-	modelTTFT := map[string]float64{
-		"model-a": 2.5,
-		"model-b": 1.8,
-	}
-	classifier := classification.NewClassifier(cfg, categoryMapping, piiMapping, nil, modelTTFT)
+	classifier := classification.NewClassifier(cfg, categoryMapping, piiMapping, nil)
 
 	// Create PII checker
 	piiChecker := pii.NewPolicyChecker(cfg, cfg.ModelConfig)
diff --git a/src/semantic-router/pkg/utils/classification/classifier.go b/src/semantic-router/pkg/utils/classification/classifier.go
@@ -5,7 +5,6 @@ import (
 	"log"
 	"slices"
 	"strings"
-	"sync"
 	"time"
 
 	candle_binding "github.com/vllm-project/semantic-router/candle-binding"
@@ -148,16 +147,12 @@ type Classifier struct {
 	CategoryMapping  *CategoryMapping
 	PIIMapping       *PIIMapping
 	JailbreakMapping *JailbreakMapping
-	// Model selection fields
-	ModelLoad     map[string]int
-	ModelLoadLock sync.Mutex
-	ModelTTFT     map[string]float64
 	// Jailbreak detection state
 	JailbreakInitialized bool
 }
 
 // NewClassifier creates a new classifier with model selection and jailbreak detection capabilities
-func NewClassifier(cfg *config.RouterConfig, categoryMapping *CategoryMapping, piiMapping *PIIMapping, jailbreakMapping *JailbreakMapping, modelTTFT map[string]float64) *Classifier {
+func NewClassifier(cfg *config.RouterConfig, categoryMapping *CategoryMapping, piiMapping *PIIMapping, jailbreakMapping *JailbreakMapping) *Classifier {
 	return &Classifier{
 		categoryInference:    createCategoryInference(cfg.Classifier.CategoryModel.UseModernBERT),
 		jailbreakInitializer: createJailbreakInitializer(cfg.PromptGuard.UseModernBERT),
@@ -168,8 +163,6 @@ func NewClassifier(cfg *config.RouterConfig, categoryMapping *CategoryMapping, p
 		CategoryMapping:      categoryMapping,
 		PIIMapping:           piiMapping,
 		JailbreakMapping:     jailbreakMapping,
-		ModelLoad:            make(map[string]int),
-		ModelTTFT:            modelTTFT,
 		JailbreakInitialized: false,
 	}
 }
@@ -475,9 +468,6 @@ func (c *Classifier) SelectBestModelForCategory(categoryName string) string {
 		return c.Config.DefaultModel
 	}
 
-	c.ModelLoadLock.Lock()
-	defer c.ModelLoadLock.Unlock()
-
 	bestModel, bestScore, bestQuality := c.selectBestModelInternal(cat, nil)
 
 	if bestModel == "" {
@@ -500,25 +490,6 @@ func (c *Classifier) findCategory(categoryName string) *config.Category {
 	return nil
 }
 
-// calculateModelScore calculates the combined score and quality for a model
-func (c *Classifier) calculateModelScore(modelScore config.ModelScore) (float64, float64) {
-	quality := modelScore.Score
-	model := modelScore.Model
-
-	if !c.Config.Classifier.LoadAware {
-		return quality, quality
-	}
-
-	baseTTFT := c.ModelTTFT[model]
-	load := c.ModelLoad[model]
-	estTTFT := baseTTFT * (1 + float64(load))
-	if estTTFT == 0 {
-		estTTFT = 1 // avoid div by zero
-	}
-	score := quality / estTTFT
-	return score, quality
-}
-
 // selectBestModelInternal performs the core model selection logic
 //
 // modelFilter is optional - if provided, only models passing the filter will be considered
@@ -532,8 +503,7 @@ func (c *Classifier) selectBestModelInternal(cat *config.Category, modelFilter f
 		if modelFilter != nil && !modelFilter(model) {
 			return
 		}
-		score, quality := c.calculateModelScore(modelScore)
-		c.updateBestModel(score, quality, model, &bestScore, &bestQuality, &bestModel)
+		c.updateBestModel(modelScore.Score, modelScore.Score, model, &bestScore, &bestQuality, &bestModel)
 	})
 
 	return bestModel, bestScore, bestQuality
@@ -558,9 +528,6 @@ func (c *Classifier) SelectBestModelFromList(candidateModels []string, categoryN
 		return candidateModels[0]
 	}
 
-	c.ModelLoadLock.Lock()
-	defer c.ModelLoadLock.Unlock()
-
 	bestModel, bestScore, bestQuality := c.selectBestModelInternal(cat,
 		func(model string) bool {
 			return slices.Contains(candidateModels, model)
@@ -592,22 +559,6 @@ func (c *Classifier) GetModelsForCategory(categoryName string) []string {
 	return models
 }
 
-// IncrementModelLoad increments the load counter for a model
-func (c *Classifier) IncrementModelLoad(model string) {
-	c.ModelLoadLock.Lock()
-	defer c.ModelLoadLock.Unlock()
-	c.ModelLoad[model]++
-}
-
-// DecrementModelLoad decrements the load counter for a model
-func (c *Classifier) DecrementModelLoad(model string) {
-	c.ModelLoadLock.Lock()
-	defer c.ModelLoadLock.Unlock()
-	if c.ModelLoad[model] > 0 {
-		c.ModelLoad[model]--
-	}
-}
-
 // updateBestModel updates the best model, score, and quality if the new score is better.
 func (c *Classifier) updateBestModel(score, quality float64, model string, bestScore *float64, bestQuality *float64, bestModel *string) {
 	if score > *bestScore {
diff --git a/src/semantic-router/pkg/utils/classification/classifier_test.go b/src/semantic-router/pkg/utils/classification/classifier_test.go
@@ -145,40 +145,21 @@ var _ = Describe("category classification and model selection", func() {
 			},
 		}
 		classifier.Config.DefaultModel = "default-model"
-		classifier.Config.Classifier.LoadAware = false
 	})
 
 	Describe("select best model for category", func() {
+		It("should return the best model", func() {
+			model := classifier.SelectBestModelForCategory("technology")
+			Expect(model).To(Equal("model-a"))
+		})
+
 		Context("when category is not found", func() {
 			It("should return the default model", func() {
 				model := classifier.SelectBestModelForCategory("non-existent-category")
 				Expect(model).To(Equal("default-model"))
 			})
 		})
 
-		Context("when category is found and the classifier is not load aware", func() {
-			It("should return the best model", func() {
-				model := classifier.SelectBestModelForCategory("technology")
-				Expect(model).To(Equal("model-a"))
-			})
-		})
-
-		Context("when the classifier is load aware", func() {
-			It("should return the best model", func() {
-				classifier.Config.Classifier.LoadAware = true
-				classifier.ModelTTFT = map[string]float64{
-					"model-a": 1,
-					"model-b": 1,
-				}
-				classifier.ModelLoad = map[string]int{
-					"model-a": 10,
-					"model-b": 1,
-				}
-				model := classifier.SelectBestModelForCategory("technology")
-				Expect(model).To(Equal("model-b"))
-			})
-		})
-
 		Context("when no best model is found", func() {
 			It("should return the default model", func() {
 				model := classifier.SelectBestModelForCategory("sports")
@@ -188,6 +169,11 @@ var _ = Describe("category classification and model selection", func() {
 	})
 
 	Describe("select best model from list", func() {
+		It("should return the best model", func() {
+			model := classifier.SelectBestModelFromList([]string{"model-a"}, "technology")
+			Expect(model).To(Equal("model-a"))
+		})
+
 		Context("when candidate models are empty", func() {
 			It("should return the default model", func() {
 				model := classifier.SelectBestModelFromList([]string{}, "technology")
@@ -202,13 +188,6 @@ var _ = Describe("category classification and model selection", func() {
 			})
 		})
 
-		Context("when category is found and the classifier is not load aware", func() {
-			It("should return the best model", func() {
-				model := classifier.SelectBestModelFromList([]string{"model-a"}, "technology")
-				Expect(model).To(Equal("model-a"))
-			})
-		})
-
 		Context("when the model is not in the candidate models", func() {
 			It("should return the first candidate model", func() {
 				model := classifier.SelectBestModelFromList([]string{"model-c"}, "technology")
@@ -274,57 +253,7 @@ var _ = Describe("category classification and model selection", func() {
 			Entry("should return nil for non-existent category", row{query: "non-existent", want: nil}),
 		)
 
-		type scoreRow struct {
-			loadAware     bool
-			model         string
-			modelScore    float64
-			expectedScore float64
-			expectedQual  float64
-		}
-
-		DescribeTable("calculate model score",
-			func(r scoreRow) {
-				classifier.ModelTTFT = map[string]float64{"model-a": 2.0}
-				classifier.ModelLoad = map[string]int{"model-a": 10}
-				classifier.Config.Classifier.LoadAware = r.loadAware
-				modelScore := config.ModelScore{Model: r.model, Score: r.modelScore}
-
-				score, quality := classifier.calculateModelScore(modelScore)
-
-				Expect(score).To(BeNumerically("~", r.expectedScore, 0.00001), "score should match expected")
-				Expect(quality).To(BeNumerically("~", r.expectedQual, 0.00001), "quality should match expected")
-			},
-			Entry("load aware disabled - returns quality as both score and quality",
-				scoreRow{
-					loadAware:     false,
-					model:         "model-a",
-					modelScore:    0.9,
-					expectedScore: 0.9,
-					expectedQual:  0.9,
-				}),
-			Entry("load aware enabled - calculates score with load and TTFT",
-				scoreRow{
-					loadAware:     true,
-					model:         "model-a",
-					modelScore:    0.9,
-					expectedScore: 0.0409, // 0.9 / (2.0 * (1 + 10)) = 0.9 / 22.0
-					expectedQual:  0.9,
-				}),
-			Entry("load aware enabled - handles zero TTFT by using 1",
-				scoreRow{
-					loadAware:     true,
-					model:         "model-unknown",
-					modelScore:    0.8,
-					expectedScore: 0.8, // 0.8 / 1 (zero TTFT becomes 1)
-					expectedQual:  0.8,
-				}),
-		)
-
 		Describe("select best model internal", func() {
-			BeforeEach(func() {
-				classifier.Config.Classifier.LoadAware = false
-				classifier.ModelLoad = make(map[string]int)
-			})
 
 			It("should select best model without filter", func() {
 				cat := &config.Category{
diff --git a/src/training/model_eval/result_to_config.py b/src/training/model_eval/result_to_config.py
@@ -117,7 +117,6 @@ def generate_config_yaml(category_accuracies, similarity_threshold):
                 "use_cpu": True,
                 "pii_mapping_path": "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json",
             },
-            "load_aware": False,
         },
         "categories": [],
         "default_reasoning_effort": "medium",  # Default reasoning effort level (low, medium, high)

Original file line number	Diff line number	Diff line change
`@@ -52,7 +52,6 @@ func (r OpenAIRouter) handleResponseBody(v ext_proc.ProcessingRequest_Response`
`52`	`52`	`float64(completionTokens),`
`53`	`53`	`)`
`54`	`54`	`metrics.RecordModelCompletionLatency(ctx.RequestModel, completionLatency.Seconds())`
`55`		`- r.Classifier.DecrementModelLoad(ctx.RequestModel)`
`56`	`55`
`57`	`56`	`// Compute and record cost if pricing is configured`
`58`	`57`	`if r.Config != nil {`