@@ -81,6 +81,9 @@ type RouterOptions struct {
8181
8282 // Gateway route cache clearing
8383 ClearRouteCache bool `yaml:"clear_route_cache"`
84+
85+ // Ensemble configuration for multi-model inference
86+ Ensemble EnsembleConfig `yaml:"ensemble,omitempty"`
8487}
8588
8689// InlineModels represents the configuration for models that are built into the binary
@@ -812,3 +815,26 @@ type PIIDetectionPolicy struct {
812815 // If nil, uses the global threshold from Classifier.PIIModel.Threshold
813816 PIIThreshold * float32 `yaml:"pii_threshold,omitempty"`
814817}
818+
819+ // EnsembleConfig represents configuration for ensemble orchestration
820+ type EnsembleConfig struct {
821+ // Enabled controls whether ensemble mode is available
822+ Enabled bool `yaml:"enabled"`
823+
824+ // DefaultStrategy is the default aggregation strategy
825+ // Values: "voting", "weighted", "first_success", "score_averaging", "reranking"
826+ DefaultStrategy string `yaml:"default_strategy,omitempty"`
827+
828+ // DefaultMinResponses is the default minimum number of responses required
829+ DefaultMinResponses int `yaml:"default_min_responses,omitempty"`
830+
831+ // TimeoutSeconds is the maximum time to wait for model responses
832+ TimeoutSeconds int `yaml:"timeout_seconds,omitempty"`
833+
834+ // MaxConcurrentRequests limits parallel model queries
835+ MaxConcurrentRequests int `yaml:"max_concurrent_requests,omitempty"`
836+
837+ // EndpointMappings maps model names to their OpenAI-compatible API endpoints
838+ // Example: {"model-a": "http://localhost:8001/v1/chat/completions"}
839+ EndpointMappings map [string ]string `yaml:"endpoint_mappings,omitempty"`
840+ }
0 commit comments