@@ -3,6 +3,7 @@ package config
33import (
44 "fmt"
55 "os"
6+ "path/filepath"
67 "sync"
78
89 "gopkg.in/yaml.v3"
@@ -53,9 +54,6 @@ type RouterConfig struct {
5354 // Model parameters configuration
5455 ModelConfig map [string ]ModelParams `yaml:"model_config"`
5556
56- // GPU configuration for TTFT calculation
57- GPUConfig GPUConfig `yaml:"gpu_config"`
58-
5957 // Tools configuration for automatic tool selection
6058 Tools ToolsConfig `yaml:"tools"`
6159
@@ -191,7 +189,7 @@ type VLLMEndpoint struct {
191189 HealthCheckPath string `yaml:"health_check_path,omitempty"`
192190}
193191
194- // ModelParams represents configuration for model-specific parameters
192+ // ModelPricing represents configuration for model-specific parameters
195193type ModelPricing struct {
196194 // ISO currency code for the pricing (e.g., "USD"). Defaults to "USD" when omitted.
197195 Currency string `yaml:"currency,omitempty"`
@@ -202,15 +200,6 @@ type ModelPricing struct {
202200}
203201
204202type ModelParams struct {
205- // Number of parameters in the model
206- ParamCount float64 `yaml:"param_count"`
207-
208- // Default batch size for this model
209- BatchSize float64 `yaml:"batch_size"`
210-
211- // Default context size for this model
212- ContextSize float64 `yaml:"context_size"`
213-
214203 // PII policy configuration for this model
215204 PIIPolicy PIIPolicy `yaml:"pii_policy,omitempty"`
216205
@@ -252,18 +241,6 @@ const (
252241 PIITypeZipCode = "ZIP_CODE" // ZIP/Postal codes
253242)
254243
255- // GPUConfig represents configuration for GPU parameters used in TTFT calculation
256- type GPUConfig struct {
257- // FLOPs performance in operations per second
258- FLOPS float64 `yaml:"flops"`
259-
260- // HBM memory bandwidth in bytes per second
261- HBM float64 `yaml:"hbm"`
262-
263- // Description of the GPU configuration (e.g., "A100-80G")
264- Description string `yaml:"description"`
265- }
266-
267244// GetCacheSimilarityThreshold returns the effective threshold for the semantic cache
268245func (c * RouterConfig ) GetCacheSimilarityThreshold () float32 {
269246 if c .SemanticCache .SimilarityThreshold != nil {
@@ -291,30 +268,56 @@ var (
291268 config * RouterConfig
292269 configOnce sync.Once
293270 configErr error
271+ configMu sync.RWMutex
294272)
295273
296- // LoadConfig loads the configuration from the specified YAML file
274+ // LoadConfig loads the configuration from the specified YAML file once and caches it globally.
297275func LoadConfig (configPath string ) (* RouterConfig , error ) {
298276 configOnce .Do (func () {
299- data , err := os . ReadFile (configPath )
277+ cfg , err := ParseConfigFile (configPath )
300278 if err != nil {
301- configErr = fmt .Errorf ("failed to read config file: %w" , err )
302- return
303- }
304-
305- config = & RouterConfig {}
306- if err := yaml .Unmarshal (data , config ); err != nil {
307- configErr = fmt .Errorf ("failed to parse config file: %w" , err )
279+ configErr = err
308280 return
309281 }
282+ configMu .Lock ()
283+ config = cfg
284+ configMu .Unlock ()
310285 })
311-
312286 if configErr != nil {
313287 return nil , configErr
314288 }
289+ configMu .RLock ()
290+ defer configMu .RUnlock ()
315291 return config , nil
316292}
317293
294+ // ParseConfigFile parses the YAML config file without touching the global cache.
295+ func ParseConfigFile (configPath string ) (* RouterConfig , error ) {
296+ // Resolve symlinks to handle Kubernetes ConfigMap mounts
297+ resolved , _ := filepath .EvalSymlinks (configPath )
298+ if resolved == "" {
299+ resolved = configPath
300+ }
301+ data , err := os .ReadFile (resolved )
302+ if err != nil {
303+ return nil , fmt .Errorf ("failed to read config file: %w" , err )
304+ }
305+ cfg := & RouterConfig {}
306+ if err := yaml .Unmarshal (data , cfg ); err != nil {
307+ return nil , fmt .Errorf ("failed to parse config file: %w" , err )
308+ }
309+ return cfg , nil
310+ }
311+
312+ // ReplaceGlobalConfig replaces the globally cached config. It is safe for concurrent readers.
313+ func ReplaceGlobalConfig (newCfg * RouterConfig ) {
314+ configMu .Lock ()
315+ defer configMu .Unlock ()
316+ config = newCfg
317+ // Do not reset configOnce to avoid racing re-parses via LoadConfig; callers should use ParseConfigFile for fresher reads.
318+ configErr = nil
319+ }
320+
318321// GetConfig returns the current configuration
319322func GetConfig () * RouterConfig {
320323 return config
@@ -349,33 +352,6 @@ func (c *RouterConfig) GetModelForCategoryIndex(index int) string {
349352 return c .DefaultModel
350353}
351354
352- // GetModelParamCount returns the parameter count for a given model
353- // If the model is not found in the config, returns the default value
354- func (c * RouterConfig ) GetModelParamCount (modelName string , defaultValue float64 ) float64 {
355- if modelConfig , ok := c .ModelConfig [modelName ]; ok {
356- return modelConfig .ParamCount
357- }
358- return defaultValue
359- }
360-
361- // GetModelBatchSize returns the batch size for a given model
362- // If the model is not found in the config, returns the default value
363- func (c * RouterConfig ) GetModelBatchSize (modelName string , defaultValue float64 ) float64 {
364- if modelConfig , ok := c .ModelConfig [modelName ]; ok {
365- return modelConfig .BatchSize
366- }
367- return defaultValue
368- }
369-
370- // GetModelContextSize returns the context size for a given model
371- // If the model is not found in the config, returns the default value
372- func (c * RouterConfig ) GetModelContextSize (modelName string , defaultValue float64 ) float64 {
373- if modelConfig , ok := c .ModelConfig [modelName ]; ok {
374- return modelConfig .ContextSize
375- }
376- return defaultValue
377- }
378-
379355// GetModelPricing returns pricing per 1M tokens and its currency for the given model.
380356// The currency indicates the unit of the returned rates (e.g., "USD").
381357func (c * RouterConfig ) GetModelPricing (modelName string ) (promptPer1M float64 , completionPer1M float64 , currency string , ok bool ) {
0 commit comments