Skip to content

Commit b74bca4

Browse files
authored
Merge branch 'main' into fix_docker_compose
2 parents a5fd028 + 464ed6c commit b74bca4

File tree

13 files changed

+237
-326
lines changed

13 files changed

+237
-326
lines changed

config/config.yaml

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,6 @@ prompt_guard:
2020
threshold: 0.7
2121
use_cpu: true
2222
jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
23-
gpu_config:
24-
flops: 312000000000000 # 312e12 fp16
25-
hbm: 2000000000000 # 2e12 (2 TB/s)
26-
description: "A100-80G" # https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf
2723

2824
# vLLM Endpoints Configuration - supports multiple endpoints, each can serve multiple models
2925
vllm_endpoints:
@@ -52,9 +48,6 @@ vllm_endpoints:
5248

5349
model_config:
5450
phi4:
55-
param_count: 14000000000 # 14B parameters https://huggingface.co/microsoft/phi-4
56-
batch_size: 512.0 # vLLM default batch size
57-
context_size: 16384.0 # based on https://huggingface.co/microsoft/phi-4
5851
pricing:
5952
currency: USD
6053
prompt_per_1m: 0.07
@@ -65,9 +58,6 @@ model_config:
6558
# Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model)
6659
preferred_endpoints: ["endpoint1", "endpoint3"]
6760
gemma3:27b:
68-
param_count: 27000000000 # 27B parameters (base version)
69-
batch_size: 512.0
70-
context_size: 16384.0
7161
pricing:
7262
currency: USD
7363
prompt_per_1m: 0.067
@@ -77,9 +67,6 @@ model_config:
7767
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
7868
preferred_endpoints: ["endpoint1"]
7969
"mistral-small3.1":
80-
param_count: 22000000000
81-
batch_size: 512.0
82-
context_size: 16384.0
8370
pricing:
8471
currency: USD
8572
prompt_per_1m: 0.1

deploy/kubernetes/config.yaml

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,6 @@ prompt_guard:
2020
threshold: 0.7
2121
use_cpu: true
2222
jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
23-
gpu_config:
24-
flops: 312000000000000 # 312e12 fp16
25-
hbm: 2000000000000 # 2e12 (2 TB/s)
26-
description: "A100-80G" # https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf
2723

2824
# vLLM Endpoints Configuration - supports multiple endpoints, each can serve multiple models
2925
vllm_endpoints:
@@ -52,26 +48,17 @@ vllm_endpoints:
5248

5349
model_config:
5450
phi4:
55-
param_count: 14000000000 # 14B parameters https://huggingface.co/microsoft/phi-4
56-
batch_size: 512.0 # vLLM default batch size
57-
context_size: 16384.0 # based on https://huggingface.co/microsoft/phi-4
5851
pii_policy:
5952
allow_by_default: false # Deny all PII by default
6053
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
6154
# Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model)
6255
preferred_endpoints: ["endpoint1", "endpoint3"]
6356
gemma3:27b:
64-
param_count: 27000000000 # 27B parameters (base version)
65-
batch_size: 512.0
66-
context_size: 16384.0
6757
pii_policy:
6858
allow_by_default: false # Deny all PII by default
6959
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
7060
preferred_endpoints: ["endpoint1"]
7161
"mistral-small3.1":
72-
param_count: 22000000000
73-
batch_size: 512.0
74-
context_size: 16384.0
7562
pii_policy:
7663
allow_by_default: false # Deny all PII by default
7764
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types

src/semantic-router/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ require (
2424
github.com/cespare/xxhash/v2 v2.3.0 // indirect
2525
github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3 // indirect
2626
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
27+
github.com/fsnotify/fsnotify v1.7.0 // indirect
2728
github.com/go-logr/logr v1.4.2 // indirect
2829
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
2930
github.com/google/go-cmp v0.7.0 // indirect

src/semantic-router/go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8k
1010
github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw=
1111
github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8=
1212
github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU=
13+
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
14+
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
1315
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
1416
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
1517
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=

src/semantic-router/pkg/config/config.go

Lines changed: 38 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package config
33
import (
44
"fmt"
55
"os"
6+
"path/filepath"
67
"sync"
78

89
"gopkg.in/yaml.v3"
@@ -53,9 +54,6 @@ type RouterConfig struct {
5354
// Model parameters configuration
5455
ModelConfig map[string]ModelParams `yaml:"model_config"`
5556

56-
// GPU configuration for TTFT calculation
57-
GPUConfig GPUConfig `yaml:"gpu_config"`
58-
5957
// Tools configuration for automatic tool selection
6058
Tools ToolsConfig `yaml:"tools"`
6159

@@ -191,7 +189,7 @@ type VLLMEndpoint struct {
191189
HealthCheckPath string `yaml:"health_check_path,omitempty"`
192190
}
193191

194-
// ModelParams represents configuration for model-specific parameters
192+
// ModelPricing represents configuration for model-specific parameters
195193
type ModelPricing struct {
196194
// ISO currency code for the pricing (e.g., "USD"). Defaults to "USD" when omitted.
197195
Currency string `yaml:"currency,omitempty"`
@@ -202,15 +200,6 @@ type ModelPricing struct {
202200
}
203201

204202
type ModelParams struct {
205-
// Number of parameters in the model
206-
ParamCount float64 `yaml:"param_count"`
207-
208-
// Default batch size for this model
209-
BatchSize float64 `yaml:"batch_size"`
210-
211-
// Default context size for this model
212-
ContextSize float64 `yaml:"context_size"`
213-
214203
// PII policy configuration for this model
215204
PIIPolicy PIIPolicy `yaml:"pii_policy,omitempty"`
216205

@@ -252,18 +241,6 @@ const (
252241
PIITypeZipCode = "ZIP_CODE" // ZIP/Postal codes
253242
)
254243

255-
// GPUConfig represents configuration for GPU parameters used in TTFT calculation
256-
type GPUConfig struct {
257-
// FLOPs performance in operations per second
258-
FLOPS float64 `yaml:"flops"`
259-
260-
// HBM memory bandwidth in bytes per second
261-
HBM float64 `yaml:"hbm"`
262-
263-
// Description of the GPU configuration (e.g., "A100-80G")
264-
Description string `yaml:"description"`
265-
}
266-
267244
// GetCacheSimilarityThreshold returns the effective threshold for the semantic cache
268245
func (c *RouterConfig) GetCacheSimilarityThreshold() float32 {
269246
if c.SemanticCache.SimilarityThreshold != nil {
@@ -291,30 +268,56 @@ var (
291268
config *RouterConfig
292269
configOnce sync.Once
293270
configErr error
271+
configMu sync.RWMutex
294272
)
295273

296-
// LoadConfig loads the configuration from the specified YAML file
274+
// LoadConfig loads the configuration from the specified YAML file once and caches it globally.
297275
func LoadConfig(configPath string) (*RouterConfig, error) {
298276
configOnce.Do(func() {
299-
data, err := os.ReadFile(configPath)
277+
cfg, err := ParseConfigFile(configPath)
300278
if err != nil {
301-
configErr = fmt.Errorf("failed to read config file: %w", err)
302-
return
303-
}
304-
305-
config = &RouterConfig{}
306-
if err := yaml.Unmarshal(data, config); err != nil {
307-
configErr = fmt.Errorf("failed to parse config file: %w", err)
279+
configErr = err
308280
return
309281
}
282+
configMu.Lock()
283+
config = cfg
284+
configMu.Unlock()
310285
})
311-
312286
if configErr != nil {
313287
return nil, configErr
314288
}
289+
configMu.RLock()
290+
defer configMu.RUnlock()
315291
return config, nil
316292
}
317293

294+
// ParseConfigFile parses the YAML config file without touching the global cache.
295+
func ParseConfigFile(configPath string) (*RouterConfig, error) {
296+
// Resolve symlinks to handle Kubernetes ConfigMap mounts
297+
resolved, _ := filepath.EvalSymlinks(configPath)
298+
if resolved == "" {
299+
resolved = configPath
300+
}
301+
data, err := os.ReadFile(resolved)
302+
if err != nil {
303+
return nil, fmt.Errorf("failed to read config file: %w", err)
304+
}
305+
cfg := &RouterConfig{}
306+
if err := yaml.Unmarshal(data, cfg); err != nil {
307+
return nil, fmt.Errorf("failed to parse config file: %w", err)
308+
}
309+
return cfg, nil
310+
}
311+
312+
// ReplaceGlobalConfig replaces the globally cached config. It is safe for concurrent readers.
313+
func ReplaceGlobalConfig(newCfg *RouterConfig) {
314+
configMu.Lock()
315+
defer configMu.Unlock()
316+
config = newCfg
317+
// Do not reset configOnce to avoid racing re-parses via LoadConfig; callers should use ParseConfigFile for fresher reads.
318+
configErr = nil
319+
}
320+
318321
// GetConfig returns the current configuration
319322
func GetConfig() *RouterConfig {
320323
return config
@@ -349,33 +352,6 @@ func (c *RouterConfig) GetModelForCategoryIndex(index int) string {
349352
return c.DefaultModel
350353
}
351354

352-
// GetModelParamCount returns the parameter count for a given model
353-
// If the model is not found in the config, returns the default value
354-
func (c *RouterConfig) GetModelParamCount(modelName string, defaultValue float64) float64 {
355-
if modelConfig, ok := c.ModelConfig[modelName]; ok {
356-
return modelConfig.ParamCount
357-
}
358-
return defaultValue
359-
}
360-
361-
// GetModelBatchSize returns the batch size for a given model
362-
// If the model is not found in the config, returns the default value
363-
func (c *RouterConfig) GetModelBatchSize(modelName string, defaultValue float64) float64 {
364-
if modelConfig, ok := c.ModelConfig[modelName]; ok {
365-
return modelConfig.BatchSize
366-
}
367-
return defaultValue
368-
}
369-
370-
// GetModelContextSize returns the context size for a given model
371-
// If the model is not found in the config, returns the default value
372-
func (c *RouterConfig) GetModelContextSize(modelName string, defaultValue float64) float64 {
373-
if modelConfig, ok := c.ModelConfig[modelName]; ok {
374-
return modelConfig.ContextSize
375-
}
376-
return defaultValue
377-
}
378-
379355
// GetModelPricing returns pricing per 1M tokens and its currency for the given model.
380356
// The currency indicates the unit of the returned rates (e.g., "USD").
381357
func (c *RouterConfig) GetModelPricing(modelName string) (promptPer1M float64, completionPer1M float64, currency string, ok bool) {

0 commit comments

Comments
 (0)