Skip to content

Commit 736bd4d

Browse files
Copilotrootfs
andcommitted
Add ensemble factory and integrate with extproc
Co-authored-by: rootfs <[email protected]>
1 parent 618b286 commit 736bd4d

File tree

10 files changed

+710
-0
lines changed

10 files changed

+710
-0
lines changed

config/config.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,19 @@ embedding_models:
504504
gemma_model_path: "models/embeddinggemma-300m"
505505
use_cpu: true # Set to false for GPU acceleration (requires CUDA)
506506

507+
# Ensemble Configuration
508+
# Enables multi-model inference with configurable aggregation strategies
509+
ensemble:
510+
enabled: false # Enable ensemble mode (disabled by default)
511+
default_strategy: "voting" # voting, weighted, first_success, score_averaging, reranking
512+
default_min_responses: 2 # Minimum number of successful responses required
513+
timeout_seconds: 30 # Maximum time to wait for model responses
514+
max_concurrent_requests: 10 # Limit parallel model queries
515+
endpoint_mappings: # Map model names to OpenAI-compatible API endpoints
516+
# Example:
517+
# model-a: "http://localhost:8001/v1/chat/completions"
518+
# model-b: "http://localhost:8002/v1/chat/completions"
519+
507520
# Observability Configuration
508521
observability:
509522
tracing:

src/semantic-router/pkg/config/config.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ type RouterOptions struct {
8181

8282
// Gateway route cache clearing
8383
ClearRouteCache bool `yaml:"clear_route_cache"`
84+
85+
// Ensemble configuration for multi-model inference
86+
Ensemble EnsembleConfig `yaml:"ensemble,omitempty"`
8487
}
8588

8689
// InlineModels represents the configuration for models that are built into the binary
@@ -812,3 +815,26 @@ type PIIDetectionPolicy struct {
812815
// If nil, uses the global threshold from Classifier.PIIModel.Threshold
813816
PIIThreshold *float32 `yaml:"pii_threshold,omitempty"`
814817
}
818+
819+
// EnsembleConfig represents configuration for ensemble orchestration
820+
type EnsembleConfig struct {
821+
// Enabled controls whether ensemble mode is available
822+
Enabled bool `yaml:"enabled"`
823+
824+
// DefaultStrategy is the default aggregation strategy
825+
// Values: "voting", "weighted", "first_success", "score_averaging", "reranking"
826+
DefaultStrategy string `yaml:"default_strategy,omitempty"`
827+
828+
// DefaultMinResponses is the default minimum number of responses required
829+
DefaultMinResponses int `yaml:"default_min_responses,omitempty"`
830+
831+
// TimeoutSeconds is the maximum time to wait for model responses
832+
TimeoutSeconds int `yaml:"timeout_seconds,omitempty"`
833+
834+
// MaxConcurrentRequests limits parallel model queries
835+
MaxConcurrentRequests int `yaml:"max_concurrent_requests,omitempty"`
836+
837+
// EndpointMappings maps model names to their OpenAI-compatible API endpoints
838+
// Example: {"model-a": "http://localhost:8001/v1/chat/completions"}
839+
EndpointMappings map[string]string `yaml:"endpoint_mappings,omitempty"`
840+
}

0 commit comments

Comments
 (0)