From 4e286e567be9ab6611c83f8783a7c0adbd1a3bc9 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 22 Sep 2025 22:21:20 +0000
Subject: [PATCH 1/2] Initial plan


From b4c206f04bba01edddf4a5b0b828673c6605b0ac Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 22 Sep 2025 22:40:10 +0000
Subject: [PATCH 2/2] Implement comprehensive multi-cloud and hybrid cloud
 routing support

Co-authored-by: wangchen615 <39573014+wangchen615@users.noreply.github.com>
---
 config/multi-cloud-config-example.yaml        | 351 +++++++++++++++++
 src/semantic-router/pkg/config/config.go      | 366 ++++++++++++++++++
 .../pkg/extproc/request_handler.go            |  97 ++++-
 src/semantic-router/pkg/extproc/router.go     |  11 +
 .../pkg/intercluster/router.go                | 351 +++++++++++++++++
 .../pkg/intercluster/router_test.go           | 293 ++++++++++++++
 .../getting-started/multi-cloud-routing.md    | 205 ++++++++++
 7 files changed, 1661 insertions(+), 13 deletions(-)
 create mode 100644 config/multi-cloud-config-example.yaml
 create mode 100644 src/semantic-router/pkg/intercluster/router.go
 create mode 100644 src/semantic-router/pkg/intercluster/router_test.go
 create mode 100644 website/docs/getting-started/multi-cloud-routing.md

diff --git a/config/multi-cloud-config-example.yaml b/config/multi-cloud-config-example.yaml
new file mode 100644
index 00000000..078b2e8f
--- /dev/null
+++ b/config/multi-cloud-config-example.yaml
@@ -0,0 +1,351 @@
+# Multi-Cloud Semantic Router Configuration Example
+# This configuration demonstrates inter-cluster and hybrid cloud routing capabilities
+
+bert_model:
+  model_id: sentence-transformers/all-MiniLM-L12-v2
+  threshold: 0.6
+  use_cpu: true
+
+semantic_cache:
+  enabled: true
+  backend_type: "memory"
+  similarity_threshold: 0.8
+  max_entries: 1000
+  ttl_seconds: 3600
+  eviction_policy: "fifo"
+
+tools:
+  enabled: true
+  top_k: 3
+  similarity_threshold: 0.2
+  tools_db_path: "config/tools_db.json"
+  fallback_to_empty: true
+
+prompt_guard:
+  enabled: true
+  use_modernbert: true
+  model_id: "models/jailbreak_classifier_modernbert-base_model"
+  threshold: 0.7
+  use_cpu: true
+  jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
+
+# Local vLLM Endpoints (legacy single-cluster support)
+vllm_endpoints:
+  - name: "local-endpoint"
+    address: "127.0.0.1"
+    port: 8000
+    models:
+      - "llama-2-7b"
+    weight: 1
+    health_check_path: "/health"
+
+# Model Configuration
+model_config:
+  "llama-2-70b":
+    reasoning_family: "llama"
+    pii_policy:
+      allow_by_default: true
+  "gpt-4":
+    reasoning_family: "gpt"
+    pii_policy:
+      allow_by_default: false
+      pii_types_allowed: ["EMAIL_ADDRESS", "PERSON"]
+  "claude-3":
+    reasoning_family: "claude"
+    pii_policy:
+      allow_by_default: true
+
+# Inter-Cluster and Multi-Cloud Routing Configuration
+inter_cluster_routing:
+  enabled: true
+  
+  # Cluster Discovery Configuration
+  cluster_discovery:
+    method: "static"  # Options: "static", "kubernetes", "consul", "etcd"
+    refresh_interval: "30s"
+    health_check_interval: "10s"
+    
+    # Static cluster definitions
+    static_clusters:
+      - name: "on-prem-gpu-cluster"
+        location: "us-west-2"
+        type: "vllm"
+        endpoint: "https://on-prem.company.com:8000"
+        authentication:
+          type: "bearer"
+          token: "bearer-token-secret"
+        models:
+          - "llama-2-70b"
+          - "codellama-34b"
+          - "mistral-7b"
+        capabilities:
+          max_context_length: 4096
+          max_tokens_per_second: 100
+        performance:
+          avg_latency_ms: 150
+          throughput_rps: 50
+          availability: 99.5
+        compliance:
+          - "hipaa"
+          - "sox"
+        cost_per_token: 0.001
+        health_check:
+          path: "/health"
+          interval: "15s"
+          timeout: "5s"
+          unhealthy_threshold: 3
+          healthy_threshold: 2
+          
+      - name: "eu-west-cluster"
+        location: "eu-west-1"
+        type: "vllm"
+        endpoint: "https://eu-cluster.company.com:8000"
+        authentication:
+          type: "bearer"
+          token: "eu-bearer-token"
+        models:
+          - "llama-2-70b"
+          - "mistral-7b"
+        capabilities:
+          max_context_length: 4096
+          max_tokens_per_second: 80
+        performance:
+          avg_latency_ms: 200
+          throughput_rps: 40
+          availability: 99.9
+        compliance:
+          - "gdpr"
+          - "iso27001"
+        cost_per_token: 0.0015
+        health_check:
+          path: "/health"
+          interval: "15s"
+          timeout: "5s"
+          
+      - name: "code-specialized-cluster"
+        location: "us-east-1"
+        type: "vllm"
+        endpoint: "https://code-cluster.company.com:8000"
+        authentication:
+          type: "api_key"
+          key: "api-key-secret"
+        models:
+          - "codellama-34b"
+          - "gpt-4-code"
+        capabilities:
+          max_context_length: 8192
+          max_tokens_per_second: 120
+        performance:
+          avg_latency_ms: 100
+          throughput_rps: 60
+          availability: 99.8
+        cost_per_token: 0.002
+        
+  # Cloud Provider Configurations
+  providers:
+    - name: "openai-cloud"
+      type: "openai"
+      endpoint: "https://api.openai.com/v1"
+      authentication:
+        type: "api_key"
+        key: "sk-your-openai-api-key"
+      models:
+        - "gpt-4"
+        - "gpt-3.5-turbo"
+        - "gpt-4-turbo"
+      capabilities:
+        max_context_length: 8192
+        max_tokens_per_second: 200
+      performance:
+        avg_latency_ms: 300
+        throughput_rps: 100
+        availability: 99.9
+      rate_limit:
+        requests_per_minute: 500
+        tokens_per_minute: 90000
+        burst_allowance: 50
+        
+    - name: "anthropic-claude"
+      type: "claude"
+      endpoint: "https://api.anthropic.com/v1"
+      authentication:
+        type: "api_key"
+        key: "claude-api-key"
+      models:
+        - "claude-3"
+        - "claude-3-sonnet"
+        - "claude-3-haiku"
+      capabilities:
+        max_context_length: 200000
+        max_tokens_per_second: 150
+      performance:
+        avg_latency_ms: 400
+        throughput_rps: 80
+        availability: 99.8
+      rate_limit:
+        requests_per_minute: 300
+        tokens_per_minute: 50000
+        
+    - name: "grok-provider"
+      type: "grok"
+      endpoint: "https://api.x.ai/v1"
+      authentication:
+        type: "api_key"
+        key: "grok-api-key"
+      models:
+        - "grok-1"
+        - "grok-1.5"
+      capabilities:
+        max_context_length: 128000
+        max_tokens_per_second: 100
+      performance:
+        avg_latency_ms: 500
+        throughput_rps: 60
+        availability: 99.5
+      rate_limit:
+        requests_per_minute: 200
+        tokens_per_minute: 40000
+        
+  # Routing Strategies (applied in priority order - higher number = higher priority)
+  routing_strategies:
+    # Highest Priority: Compliance-based routing for GDPR requirements
+    - name: "gdpr-compliance-routing"
+      priority: 300
+      conditions:
+        - type: "compliance_requirement"
+          required_compliance: ["gdpr"]
+      actions:
+        - type: "route_to_cluster"
+          target: "eu-west-cluster"
+          
+    # High Priority: Code generation routing
+    - name: "code-generation-routing"
+      priority: 250
+      conditions:
+        - type: "model_requirement"
+          required_model: "codellama-34b"
+      actions:
+        - type: "route_to_cluster"
+          target: "code-specialized-cluster"
+          
+    # Medium Priority: Latency-optimized routing
+    - name: "latency-optimized-routing"
+      priority: 200
+      conditions:
+        - type: "latency_requirement"
+          max_latency_ms: 200
+      actions:
+        - type: "route_to_cluster"
+          target: "code-specialized-cluster"
+        - type: "failover"
+          failover_targets: ["on-prem-gpu-cluster", "eu-west-cluster"]
+          
+    # Medium Priority: Cost-sensitive routing
+    - name: "cost-optimized-routing"
+      priority: 150
+      conditions:
+        - type: "cost_sensitivity"
+          max_cost_per_1k_tokens: 0.0015
+      actions:
+        - type: "route_to_cluster"
+          target: "on-prem-gpu-cluster"
+        - type: "failover"
+          failover_targets: ["eu-west-cluster"]
+          
+    # Low Priority: Load balancing for general queries
+    - name: "load-balanced-routing"
+      priority: 100
+      conditions: []  # No specific conditions - applies to all requests
+      actions:
+        - type: "load_balance"
+          load_balance_strategy: "round_robin"
+          
+  # Fault Tolerance Configuration
+  fault_tolerance:
+    circuit_breaker:
+      failure_threshold: 5
+      timeout: "30s"
+      max_requests: 10
+    retry_policy:
+      max_retries: 3
+      backoff_multiplier: 2.0
+      max_backoff: "10s"
+      retry_on_errors: ["timeout", "connection_error", "server_error"]
+    fallback_strategy: "next_best_cluster"
+    default_fallback_cluster: "on-prem-gpu-cluster"
+
+# Classifier configuration
+classifier:
+  category_model:
+    model_id: "models/category_classifier_modernbert-base_model"
+    use_modernbert: true
+    threshold: 0.7
+    use_cpu: true
+    category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
+  pii_model:
+    model_id: "models/pii_classifier_modernbert-base_model"
+    threshold: 0.7
+    use_cpu: true
+    pii_mapping_path: "models/pii_classifier_modernbert-base_model/pii_type_mapping.json"
+
+# Categories for routing queries
+categories:
+  - name: "math"
+    description: "Mathematical calculations and problem solving"
+    model_scores:
+      - model: "llama-2-70b"
+        score: 0.9
+        use_reasoning: true
+      - model: "gpt-4"
+        score: 0.85
+        use_reasoning: true
+        
+  - name: "creative"
+    description: "Creative writing, storytelling, and artistic content"
+    model_scores:
+      - model: "claude-3"
+        score: 0.95
+        use_reasoning: false
+      - model: "gpt-4"
+        score: 0.8
+        use_reasoning: false
+        
+  - name: "code_generation"
+    description: "Programming, code generation, and software development"
+    reasoning_description: "Code generation with step-by-step reasoning"
+    reasoning_effort: "high"
+    model_scores:
+      - model: "codellama-34b"
+        score: 0.95
+        use_reasoning: true
+      - model: "gpt-4-code"
+        score: 0.9
+        use_reasoning: true
+        
+  - name: "general"
+    description: "General purpose queries and conversations"
+    model_scores:
+      - model: "llama-2-70b"
+        score: 0.8
+        use_reasoning: false
+      - model: "gpt-3.5-turbo"
+        score: 0.75
+        use_reasoning: false
+
+# Default model to use if no match is found
+default_model: "llama-2-7b"
+
+# Default reasoning effort level
+default_reasoning_effort: "medium"
+
+# Reasoning family configurations
+reasoning_families:
+  llama:
+    type: "chat_template_kwargs"
+    parameter: "thinking"
+  gpt:
+    type: "reasoning_effort"
+    parameter: "reasoning_effort"
+  claude:
+    type: "chat_template_kwargs"
+    parameter: "enable_thinking"
\ No newline at end of file
diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go
index cf4aafa9..b2653d89 100644
--- a/src/semantic-router/pkg/config/config.go
+++ b/src/semantic-router/pkg/config/config.go
@@ -85,6 +85,9 @@ type RouterConfig struct {
 	// vLLM endpoints configuration for multiple backend support
 	VLLMEndpoints []VLLMEndpoint `yaml:"vllm_endpoints"`
 
+	// Inter-cluster routing configuration
+	InterClusterRouting InterClusterConfig `yaml:"inter_cluster_routing,omitempty"`
+
 	// API configuration for classification endpoints
 	API APIConfig `yaml:"api"`
 }
@@ -251,6 +254,272 @@ const (
 	PIITypeZipCode         = "ZIP_CODE"          // ZIP/Postal codes
 )
 
+// InterClusterConfig represents the inter-cluster and multi-cloud routing configuration
+type InterClusterConfig struct {
+	// Enable inter-cluster routing
+	Enabled bool `yaml:"enabled"`
+
+	// Cluster discovery configuration
+	ClusterDiscovery ClusterDiscoveryConfig `yaml:"cluster_discovery,omitempty"`
+
+	// Provider configurations
+	Providers []ProviderConfig `yaml:"providers,omitempty"`
+
+	// Routing strategies
+	RoutingStrategies []RoutingStrategy `yaml:"routing_strategies,omitempty"`
+
+	// Fault tolerance configuration
+	FaultTolerance FaultToleranceConfig `yaml:"fault_tolerance,omitempty"`
+}
+
+// ClusterDiscoveryConfig represents cluster discovery and management settings
+type ClusterDiscoveryConfig struct {
+	// Discovery method: "static", "kubernetes", "consul", "etcd"
+	Method string `yaml:"method"`
+
+	// Refresh interval for dynamic discovery
+	RefreshInterval string `yaml:"refresh_interval,omitempty"`
+
+	// Health check interval
+	HealthCheckInterval string `yaml:"health_check_interval,omitempty"`
+
+	// Static cluster definitions (used when method is "static")
+	StaticClusters []ClusterConfig `yaml:"static_clusters,omitempty"`
+}
+
+// ClusterConfig represents a cluster configuration
+type ClusterConfig struct {
+	// Cluster name identifier
+	Name string `yaml:"name"`
+
+	// Cluster location/region
+	Location string `yaml:"location"`
+
+	// Cluster type: "vllm", "openai", "claude", "grok", "custom"
+	Type string `yaml:"type"`
+
+	// Endpoint address
+	Endpoint string `yaml:"endpoint"`
+
+	// Authentication configuration
+	Authentication AuthConfig `yaml:"authentication,omitempty"`
+
+	// Available models in this cluster
+	Models []string `yaml:"models"`
+
+	// Cluster capabilities
+	Capabilities ClusterCapabilities `yaml:"capabilities,omitempty"`
+
+	// Performance characteristics
+	Performance PerformanceMetrics `yaml:"performance,omitempty"`
+
+	// Compliance and data residency
+	Compliance []string `yaml:"compliance,omitempty"`
+
+	// Cost per token (for cost-based routing)
+	CostPerToken float64 `yaml:"cost_per_token,omitempty"`
+
+	// Health check configuration
+	HealthCheck HealthCheckConfig `yaml:"health_check,omitempty"`
+}
+
+// ProviderConfig represents a cloud provider configuration
+type ProviderConfig struct {
+	// Provider name
+	Name string `yaml:"name"`
+
+	// Provider type: "vllm", "openai", "claude", "grok", "custom"
+	Type string `yaml:"type"`
+
+	// Endpoint URL
+	Endpoint string `yaml:"endpoint"`
+
+	// Authentication configuration
+	Authentication AuthConfig `yaml:"authentication"`
+
+	// Available models
+	Models []string `yaml:"models"`
+
+	// Provider-specific capabilities
+	Capabilities ClusterCapabilities `yaml:"capabilities,omitempty"`
+
+	// Performance characteristics
+	Performance PerformanceMetrics `yaml:"performance,omitempty"`
+
+	// Rate limiting configuration
+	RateLimit RateLimitConfig `yaml:"rate_limit,omitempty"`
+}
+
+// AuthConfig represents authentication configuration for providers/clusters
+type AuthConfig struct {
+	// Authentication type: "bearer", "api_key", "oauth", "none"
+	Type string `yaml:"type"`
+
+	// Bearer token
+	Token string `yaml:"token,omitempty"`
+
+	// API key
+	Key string `yaml:"key,omitempty"`
+
+	// OAuth configuration
+	OAuth OAuthConfig `yaml:"oauth,omitempty"`
+}
+
+// OAuthConfig represents OAuth authentication configuration
+type OAuthConfig struct {
+	ClientID     string `yaml:"client_id"`
+	ClientSecret string `yaml:"client_secret"`
+	TokenURL     string `yaml:"token_url"`
+	Scopes       []string `yaml:"scopes,omitempty"`
+}
+
+// ClusterCapabilities represents cluster/provider capabilities
+type ClusterCapabilities struct {
+	// Maximum context length
+	MaxContextLength int `yaml:"max_context_length,omitempty"`
+
+	// Maximum tokens per second
+	MaxTokensPerSecond int `yaml:"max_tokens_per_second,omitempty"`
+
+	// Supported features
+	Features []string `yaml:"features,omitempty"`
+}
+
+// PerformanceMetrics represents performance characteristics
+type PerformanceMetrics struct {
+	// Average latency in milliseconds
+	AvgLatencyMs int `yaml:"avg_latency_ms,omitempty"`
+
+	// Throughput in requests per second
+	ThroughputRPS int `yaml:"throughput_rps,omitempty"`
+
+	// Availability percentage
+	Availability float64 `yaml:"availability,omitempty"`
+}
+
+// RateLimitConfig represents rate limiting configuration
+type RateLimitConfig struct {
+	// Requests per minute
+	RequestsPerMinute int `yaml:"requests_per_minute,omitempty"`
+
+	// Tokens per minute
+	TokensPerMinute int `yaml:"tokens_per_minute,omitempty"`
+
+	// Burst allowance
+	BurstAllowance int `yaml:"burst_allowance,omitempty"`
+}
+
+// RoutingStrategy represents a routing strategy configuration
+type RoutingStrategy struct {
+	// Strategy name
+	Name string `yaml:"name"`
+
+	// Priority (higher number = higher priority)
+	Priority int `yaml:"priority"`
+
+	// Conditions for applying this strategy
+	Conditions []RoutingCondition `yaml:"conditions,omitempty"`
+
+	// Actions to take when conditions are met
+	Actions []RoutingAction `yaml:"actions"`
+}
+
+// RoutingCondition represents a condition for routing strategy
+type RoutingCondition struct {
+	// Condition type: "latency_requirement", "cost_sensitivity", "data_residency", "model_requirement", etc.
+	Type string `yaml:"type"`
+
+	// Maximum latency in milliseconds (for latency_requirement)
+	MaxLatencyMs int `yaml:"max_latency_ms,omitempty"`
+
+	// Maximum cost per 1k tokens (for cost_sensitivity)
+	MaxCostPer1kTokens float64 `yaml:"max_cost_per_1k_tokens,omitempty"`
+
+	// Required region (for data_residency)
+	RequiredRegion string `yaml:"required_region,omitempty"`
+
+	// Required model (for model_requirement)
+	RequiredModel string `yaml:"required_model,omitempty"`
+
+	// Required compliance (for compliance_requirement)
+	RequiredCompliance []string `yaml:"required_compliance,omitempty"`
+}
+
+// RoutingAction represents an action for routing strategy
+type RoutingAction struct {
+	// Action type: "route_to_cluster", "route_to_provider", "load_balance", "failover"
+	Type string `yaml:"type"`
+
+	// Target cluster/provider name
+	Target string `yaml:"target,omitempty"`
+
+	// Load balancing strategy: "round_robin", "least_connections", "weighted"
+	LoadBalanceStrategy string `yaml:"load_balance_strategy,omitempty"`
+
+	// Failover targets (ordered list)
+	FailoverTargets []string `yaml:"failover_targets,omitempty"`
+}
+
+// FaultToleranceConfig represents fault tolerance configuration
+type FaultToleranceConfig struct {
+	// Circuit breaker configuration
+	CircuitBreaker CircuitBreakerConfig `yaml:"circuit_breaker,omitempty"`
+
+	// Retry policy
+	RetryPolicy RetryPolicyConfig `yaml:"retry_policy,omitempty"`
+
+	// Fallback strategy: "next_best_cluster", "default_cluster", "error"
+	FallbackStrategy string `yaml:"fallback_strategy,omitempty"`
+
+	// Default fallback cluster
+	DefaultFallbackCluster string `yaml:"default_fallback_cluster,omitempty"`
+}
+
+// CircuitBreakerConfig represents circuit breaker configuration
+type CircuitBreakerConfig struct {
+	// Failure threshold to open circuit
+	FailureThreshold int `yaml:"failure_threshold,omitempty"`
+
+	// Timeout before attempting to close circuit
+	Timeout string `yaml:"timeout,omitempty"`
+
+	// Maximum number of requests in half-open state
+	MaxRequests int `yaml:"max_requests,omitempty"`
+}
+
+// RetryPolicyConfig represents retry policy configuration
+type RetryPolicyConfig struct {
+	// Maximum number of retries
+	MaxRetries int `yaml:"max_retries,omitempty"`
+
+	// Backoff multiplier
+	BackoffMultiplier float64 `yaml:"backoff_multiplier,omitempty"`
+
+	// Maximum backoff duration
+	MaxBackoff string `yaml:"max_backoff,omitempty"`
+
+	// Retry on specific error codes
+	RetryOnErrors []string `yaml:"retry_on_errors,omitempty"`
+}
+
+// HealthCheckConfig represents health check configuration
+type HealthCheckConfig struct {
+	// Health check path/endpoint
+	Path string `yaml:"path,omitempty"`
+
+	// Check interval
+	Interval string `yaml:"interval,omitempty"`
+
+	// Timeout for health check
+	Timeout string `yaml:"timeout,omitempty"`
+
+	// Unhealthy threshold
+	UnhealthyThreshold int `yaml:"unhealthy_threshold,omitempty"`
+
+	// Healthy threshold
+	HealthyThreshold int `yaml:"healthy_threshold,omitempty"`
+}
+
 // GetCacheSimilarityThreshold returns the effective threshold for the semantic cache
 func (c *RouterConfig) GetCacheSimilarityThreshold() float32 {
 	if c.SemanticCache.SimilarityThreshold != nil {
@@ -662,3 +931,100 @@ func (c *RouterConfig) ValidateEndpoints() error {
 
 	return nil
 }
+
+// IsInterClusterRoutingEnabled returns true if inter-cluster routing is enabled
+func (c *RouterConfig) IsInterClusterRoutingEnabled() bool {
+	return c.InterClusterRouting.Enabled
+}
+
+// GetClusterByName returns a cluster configuration by name
+func (c *RouterConfig) GetClusterByName(name string) (*ClusterConfig, bool) {
+	if !c.IsInterClusterRoutingEnabled() {
+		return nil, false
+	}
+
+	for _, cluster := range c.InterClusterRouting.ClusterDiscovery.StaticClusters {
+		if cluster.Name == name {
+			return &cluster, true
+		}
+	}
+	return nil, false
+}
+
+// GetProviderByName returns a provider configuration by name
+func (c *RouterConfig) GetProviderByName(name string) (*ProviderConfig, bool) {
+	if !c.IsInterClusterRoutingEnabled() {
+		return nil, false
+	}
+
+	for _, provider := range c.InterClusterRouting.Providers {
+		if provider.Name == name {
+			return &provider, true
+		}
+	}
+	return nil, false
+}
+
+// GetRoutingStrategiesByPriority returns routing strategies sorted by priority (descending)
+func (c *RouterConfig) GetRoutingStrategiesByPriority() []RoutingStrategy {
+	if !c.IsInterClusterRoutingEnabled() {
+		return nil
+	}
+
+	strategies := make([]RoutingStrategy, len(c.InterClusterRouting.RoutingStrategies))
+	copy(strategies, c.InterClusterRouting.RoutingStrategies)
+
+	// Sort by priority (descending - higher priority first)
+	slices.SortFunc(strategies, func(a, b RoutingStrategy) int {
+		return b.Priority - a.Priority
+	})
+
+	return strategies
+}
+
+// GetAllClustersAndProviders returns all available clusters and providers for routing
+func (c *RouterConfig) GetAllClustersAndProviders() ([]ClusterConfig, []ProviderConfig) {
+	if !c.IsInterClusterRoutingEnabled() {
+		return nil, nil
+	}
+
+	clusters := make([]ClusterConfig, len(c.InterClusterRouting.ClusterDiscovery.StaticClusters))
+	copy(clusters, c.InterClusterRouting.ClusterDiscovery.StaticClusters)
+
+	providers := make([]ProviderConfig, len(c.InterClusterRouting.Providers))
+	copy(providers, c.InterClusterRouting.Providers)
+
+	return clusters, providers
+}
+
+// FindClustersForModel returns all clusters/providers that can serve the specified model
+func (c *RouterConfig) FindClustersForModel(modelName string) ([]ClusterConfig, []ProviderConfig) {
+	if !c.IsInterClusterRoutingEnabled() {
+		return nil, nil
+	}
+
+	var matchingClusters []ClusterConfig
+	var matchingProviders []ProviderConfig
+
+	// Check clusters
+	for _, cluster := range c.InterClusterRouting.ClusterDiscovery.StaticClusters {
+		for _, model := range cluster.Models {
+			if model == modelName {
+				matchingClusters = append(matchingClusters, cluster)
+				break
+			}
+		}
+	}
+
+	// Check providers
+	for _, provider := range c.InterClusterRouting.Providers {
+		for _, model := range provider.Models {
+			if model == modelName {
+				matchingProviders = append(matchingProviders, provider)
+				break
+			}
+		}
+	}
+
+	return matchingClusters, matchingProviders
+}
diff --git a/src/semantic-router/pkg/extproc/request_handler.go b/src/semantic-router/pkg/extproc/request_handler.go
index 867333de..eccbafc1 100644
--- a/src/semantic-router/pkg/extproc/request_handler.go
+++ b/src/semantic-router/pkg/extproc/request_handler.go
@@ -12,6 +12,7 @@ import (
 	"google.golang.org/grpc/status"
 
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/intercluster"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/http"
@@ -389,13 +390,48 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
 				// Update the actual model that will be used
 				actualModel = matchedModel
 
-				// Select the best endpoint for this model
-				endpointAddress, endpointFound := r.Config.SelectBestEndpointAddressForModel(matchedModel)
-				if endpointFound {
-					selectedEndpoint = endpointAddress
-					observability.Infof("Selected endpoint address: %s for model: %s", selectedEndpoint, matchedModel)
-				} else {
-					observability.Warnf("No endpoint found for model %s, using fallback", matchedModel)
+				// Try inter-cluster routing first if enabled
+				if r.Config.IsInterClusterRoutingEnabled() {
+					routingCtx := &intercluster.RoutingContext{
+						ModelName:   matchedModel,
+						Category:    categoryName,
+						UserContent: userContent,
+					}
+
+					// Try to route using inter-cluster router
+					if routingResult, err := r.InterClusterRouter.RouteRequest(routingCtx); err == nil {
+						selectedEndpoint = routingResult.TargetEndpoint
+						observability.Infof("Inter-cluster routing selected %s (%s) for model: %s (reason: %s)", 
+							routingResult.TargetName, routingResult.TargetType, matchedModel, routingResult.ReasonCode)
+						
+						// Log additional metrics for inter-cluster routing
+						observability.LogEvent("inter_cluster_routing", map[string]interface{}{
+							"target_type":         routingResult.TargetType,
+							"target_name":         routingResult.TargetName,
+							"target_endpoint":     routingResult.TargetEndpoint,
+							"reason_code":         routingResult.ReasonCode,
+							"confidence":          routingResult.Confidence,
+							"estimated_cost":      routingResult.EstimatedCost,
+							"estimated_latency":   routingResult.EstimatedLatency,
+							"model":               matchedModel,
+							"category":            categoryName,
+						})
+						metrics.RecordRoutingReasonCode(routingResult.ReasonCode, matchedModel)
+					} else {
+						observability.Warnf("Inter-cluster routing failed for model %s: %v, falling back to local routing", matchedModel, err)
+					}
+				}
+
+				// Fall back to local endpoint selection if inter-cluster routing didn't work
+				if selectedEndpoint == "" {
+					// Select the best endpoint for this model
+					endpointAddress, endpointFound := r.Config.SelectBestEndpointAddressForModel(matchedModel)
+					if endpointFound {
+						selectedEndpoint = endpointAddress
+						observability.Infof("Selected local endpoint address: %s for model: %s", selectedEndpoint, matchedModel)
+					} else {
+						observability.Warnf("No local endpoint found for model %s, using fallback", matchedModel)
+					}
 				}
 
 				// Modify the model in the request
@@ -505,12 +541,47 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
 		}
 
 		// Select the best endpoint for the specified model
-		endpointAddress, endpointFound := r.Config.SelectBestEndpointAddressForModel(originalModel)
-		if endpointFound {
-			selectedEndpoint = endpointAddress
-			observability.Infof("Selected endpoint address: %s for model: %s", selectedEndpoint, originalModel)
-		} else {
-			observability.Warnf("No endpoint found for model %s, using fallback", originalModel)
+		// Try inter-cluster routing first if enabled
+		if r.Config.IsInterClusterRoutingEnabled() {
+			routingCtx := &intercluster.RoutingContext{
+				ModelName:   originalModel,
+				Category:    "", // No category for non-auto routing
+				UserContent: userContent,
+			}
+
+			// Try to route using inter-cluster router
+			if routingResult, err := r.InterClusterRouter.RouteRequest(routingCtx); err == nil {
+				selectedEndpoint = routingResult.TargetEndpoint
+				observability.Infof("Inter-cluster routing selected %s (%s) for specified model: %s (reason: %s)", 
+					routingResult.TargetName, routingResult.TargetType, originalModel, routingResult.ReasonCode)
+				
+				// Log additional metrics for inter-cluster routing
+				observability.LogEvent("inter_cluster_routing", map[string]interface{}{
+					"target_type":         routingResult.TargetType,
+					"target_name":         routingResult.TargetName,
+					"target_endpoint":     routingResult.TargetEndpoint,
+					"reason_code":         routingResult.ReasonCode,
+					"confidence":          routingResult.Confidence,
+					"estimated_cost":      routingResult.EstimatedCost,
+					"estimated_latency":   routingResult.EstimatedLatency,
+					"model":               originalModel,
+					"category":            "",
+				})
+				metrics.RecordRoutingReasonCode(routingResult.ReasonCode, originalModel)
+			} else {
+				observability.Warnf("Inter-cluster routing failed for model %s: %v, falling back to local routing", originalModel, err)
+			}
+		}
+
+		// Fall back to local endpoint selection if inter-cluster routing didn't work
+		if selectedEndpoint == "" {
+			endpointAddress, endpointFound := r.Config.SelectBestEndpointAddressForModel(originalModel)
+			if endpointFound {
+				selectedEndpoint = endpointAddress
+				observability.Infof("Selected local endpoint address: %s for model: %s", selectedEndpoint, originalModel)
+			} else {
+				observability.Warnf("No local endpoint found for model %s, using fallback", originalModel)
+			}
 		}
 		setHeaders := []*core.HeaderValueOption{}
 		if selectedEndpoint != "" {
diff --git a/src/semantic-router/pkg/extproc/router.go b/src/semantic-router/pkg/extproc/router.go
index 90eed7c5..3a5fb879 100644
--- a/src/semantic-router/pkg/extproc/router.go
+++ b/src/semantic-router/pkg/extproc/router.go
@@ -9,6 +9,7 @@ import (
 
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/intercluster"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/services"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/tools"
@@ -24,6 +25,7 @@ type OpenAIRouter struct {
 	PIIChecker           *pii.PolicyChecker
 	Cache                cache.CacheBackend
 	ToolsDatabase        *tools.ToolsDatabase
+	InterClusterRouter   *intercluster.InterClusterRouter
 }
 
 // Ensure OpenAIRouter implements the ext_proc calls
@@ -138,6 +140,14 @@ func NewOpenAIRouter(configPath string) (*OpenAIRouter, error) {
 		return nil, fmt.Errorf("failed to create classifier: %w", err)
 	}
 
+	// Create inter-cluster router
+	interClusterRouter := intercluster.NewInterClusterRouter(cfg)
+	if cfg.IsInterClusterRoutingEnabled() {
+		observability.Infof("Inter-cluster routing enabled with %d clusters and %d providers",
+			len(cfg.InterClusterRouting.ClusterDiscovery.StaticClusters),
+			len(cfg.InterClusterRouting.Providers))
+	}
+
 	// Create global classification service for API access with auto-discovery
 	// This will prioritize LoRA models over legacy ModernBERT
 	autoSvc, err := services.NewClassificationServiceWithAutoDiscovery(cfg)
@@ -157,6 +167,7 @@ func NewOpenAIRouter(configPath string) (*OpenAIRouter, error) {
 		PIIChecker:           piiChecker,
 		Cache:                semanticCache,
 		ToolsDatabase:        toolsDatabase,
+		InterClusterRouter:   interClusterRouter,
 	}
 
 	// Log reasoning configuration after router is created
diff --git a/src/semantic-router/pkg/intercluster/router.go b/src/semantic-router/pkg/intercluster/router.go
new file mode 100644
index 00000000..ab41b25c
--- /dev/null
+++ b/src/semantic-router/pkg/intercluster/router.go
@@ -0,0 +1,351 @@
+package intercluster
+
+import (
+	"fmt"
+	"time"
+
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
+)
+
+// InterClusterRouter handles routing decisions across multiple clusters and providers
+type InterClusterRouter struct {
+	config *config.RouterConfig
+}
+
+// NewInterClusterRouter creates a new inter-cluster router
+func NewInterClusterRouter(cfg *config.RouterConfig) *InterClusterRouter {
+	return &InterClusterRouter{
+		config: cfg,
+	}
+}
+
+// RoutingContext contains information for routing decisions
+type RoutingContext struct {
+	ModelName           string
+	Category            string
+	UserContent         string
+	LatencyRequirement  *int     // Max latency in milliseconds
+	CostSensitivity     *float64 // Max cost per 1k tokens
+	ComplianceRequirements []string // Required compliance standards
+	DataResidency       string   // Required region
+}
+
+// RoutingResult contains the result of a routing decision
+type RoutingResult struct {
+	TargetType     string // "cluster" or "provider"
+	TargetName     string
+	TargetEndpoint string
+	ReasonCode     string
+	Confidence     float64
+	EstimatedCost  float64
+	EstimatedLatency int
+}
+
+// RouteRequest performs inter-cluster routing based on context and strategies
+func (r *InterClusterRouter) RouteRequest(ctx *RoutingContext) (*RoutingResult, error) {
+	if !r.config.IsInterClusterRoutingEnabled() {
+		observability.Infof("Inter-cluster routing disabled, falling back to local routing")
+		return nil, fmt.Errorf("inter-cluster routing is not enabled")
+	}
+
+	// Get routing strategies by priority
+	strategies := r.config.GetRoutingStrategiesByPriority()
+	if len(strategies) == 0 {
+		observability.Warnf("No routing strategies configured, using default strategy")
+		return r.defaultRouting(ctx)
+	}
+
+	// Apply strategies in priority order
+	for _, strategy := range strategies {
+		if r.evaluateConditions(strategy.Conditions, ctx) {
+			result, err := r.executeActions(strategy.Actions, ctx)
+			if err == nil && result != nil {
+				result.ReasonCode = fmt.Sprintf("strategy_%s", strategy.Name)
+				observability.Infof("Applied routing strategy '%s' for model '%s'", strategy.Name, ctx.ModelName)
+				return result, nil
+			}
+			observability.Warnf("Strategy '%s' failed: %v", strategy.Name, err)
+		}
+	}
+
+	// Fall back to default routing
+	observability.Infof("No strategies matched, using default routing for model '%s'", ctx.ModelName)
+	return r.defaultRouting(ctx)
+}
+
+// evaluateConditions checks if all conditions in a strategy are met
+func (r *InterClusterRouter) evaluateConditions(conditions []config.RoutingCondition, ctx *RoutingContext) bool {
+	if len(conditions) == 0 {
+		return true // No conditions means always apply
+	}
+
+	for _, condition := range conditions {
+		if !r.evaluateCondition(condition, ctx) {
+			return false
+		}
+	}
+	return true
+}
+
+// evaluateCondition checks if a single condition is met
+func (r *InterClusterRouter) evaluateCondition(condition config.RoutingCondition, ctx *RoutingContext) bool {
+	switch condition.Type {
+	case "latency_requirement":
+		if ctx.LatencyRequirement != nil && condition.MaxLatencyMs > 0 {
+			return *ctx.LatencyRequirement <= condition.MaxLatencyMs
+		}
+		return false // If no latency requirement in context, this condition should not match
+
+	case "cost_sensitivity":
+		if ctx.CostSensitivity != nil && condition.MaxCostPer1kTokens > 0 {
+			return *ctx.CostSensitivity <= condition.MaxCostPer1kTokens
+		}
+		return false // If no cost sensitivity in context, this condition should not match
+
+	case "data_residency":
+		if condition.RequiredRegion != "" && ctx.DataResidency != "" {
+			return ctx.DataResidency == condition.RequiredRegion
+		}
+		return false // If no data residency requirement, this condition should not match
+
+	case "model_requirement":
+		if condition.RequiredModel != "" {
+			return ctx.ModelName == condition.RequiredModel
+		}
+		return true
+
+	case "compliance_requirement":
+		if len(condition.RequiredCompliance) > 0 && len(ctx.ComplianceRequirements) > 0 {
+			return r.hasRequiredCompliance(condition.RequiredCompliance, ctx.ComplianceRequirements)
+		}
+		return false // If no compliance requirements in context, this condition should not match
+
+	default:
+		observability.Warnf("Unknown condition type: %s", condition.Type)
+		return false
+	}
+}
+
+// hasRequiredCompliance checks if all required compliance standards are met
+func (r *InterClusterRouter) hasRequiredCompliance(required []string, available []string) bool {
+	for _, req := range required {
+		found := false
+		for _, avail := range available {
+			if req == avail {
+				found = true
+				break
+			}
+		}
+		if !found {
+			return false
+		}
+	}
+	return true
+}
+
+// executeActions executes the actions for a matched strategy
+func (r *InterClusterRouter) executeActions(actions []config.RoutingAction, ctx *RoutingContext) (*RoutingResult, error) {
+	for _, action := range actions {
+		switch action.Type {
+		case "route_to_cluster":
+			return r.routeToCluster(action.Target, ctx)
+		case "route_to_provider":
+			return r.routeToProvider(action.Target, ctx)
+		case "load_balance":
+			return r.loadBalanceRoute(action, ctx)
+		case "failover":
+			return r.failoverRoute(action, ctx)
+		default:
+			observability.Warnf("Unknown action type: %s", action.Type)
+		}
+	}
+	return nil, fmt.Errorf("no executable actions found")
+}
+
+// routeToCluster routes to a specific cluster
+func (r *InterClusterRouter) routeToCluster(clusterName string, ctx *RoutingContext) (*RoutingResult, error) {
+	cluster, found := r.config.GetClusterByName(clusterName)
+	if !found {
+		return nil, fmt.Errorf("cluster '%s' not found", clusterName)
+	}
+
+	// Check if cluster supports the model
+	if !r.clusterSupportsModel(cluster, ctx.ModelName) {
+		return nil, fmt.Errorf("cluster '%s' does not support model '%s'", clusterName, ctx.ModelName)
+	}
+
+	result := &RoutingResult{
+		TargetType:       "cluster",
+		TargetName:       clusterName,
+		TargetEndpoint:   cluster.Endpoint,
+		Confidence:       1.0,
+		EstimatedCost:    cluster.CostPerToken * 1000, // Convert to per 1k tokens
+		EstimatedLatency: cluster.Performance.AvgLatencyMs,
+	}
+
+	observability.Infof("Routed to cluster '%s' for model '%s'", clusterName, ctx.ModelName)
+	return result, nil
+}
+
+// routeToProvider routes to a specific provider
+func (r *InterClusterRouter) routeToProvider(providerName string, ctx *RoutingContext) (*RoutingResult, error) {
+	provider, found := r.config.GetProviderByName(providerName)
+	if !found {
+		return nil, fmt.Errorf("provider '%s' not found", providerName)
+	}
+
+	// Check if provider supports the model
+	if !r.providerSupportsModel(provider, ctx.ModelName) {
+		return nil, fmt.Errorf("provider '%s' does not support model '%s'", providerName, ctx.ModelName)
+	}
+
+	result := &RoutingResult{
+		TargetType:       "provider",
+		TargetName:       providerName,
+		TargetEndpoint:   provider.Endpoint,
+		Confidence:       1.0,
+		EstimatedLatency: provider.Performance.AvgLatencyMs,
+	}
+
+	observability.Infof("Routed to provider '%s' for model '%s'", providerName, ctx.ModelName)
+	return result, nil
+}
+
+// loadBalanceRoute implements load balancing across multiple targets
+func (r *InterClusterRouter) loadBalanceRoute(action config.RoutingAction, ctx *RoutingContext) (*RoutingResult, error) {
+	clusters, providers := r.config.FindClustersForModel(ctx.ModelName)
+	
+	if len(clusters) == 0 && len(providers) == 0 {
+		return nil, fmt.Errorf("no clusters or providers found for model '%s'", ctx.ModelName)
+	}
+
+	// Simple round-robin load balancing based on timestamp
+	timestamp := time.Now().UnixNano()
+	totalTargets := len(clusters) + len(providers)
+	selectedIndex := int(timestamp) % totalTargets
+
+	if selectedIndex < len(clusters) {
+		// Route to cluster
+		cluster := clusters[selectedIndex]
+		return r.routeToCluster(cluster.Name, ctx)
+	} else {
+		// Route to provider
+		provider := providers[selectedIndex-len(clusters)]
+		return r.routeToProvider(provider.Name, ctx)
+	}
+}
+
+// failoverRoute implements failover routing
+func (r *InterClusterRouter) failoverRoute(action config.RoutingAction, ctx *RoutingContext) (*RoutingResult, error) {
+	for _, target := range action.FailoverTargets {
+		// Try cluster first
+		if result, err := r.routeToCluster(target, ctx); err == nil {
+			result.ReasonCode = "failover_cluster"
+			return result, nil
+		}
+
+		// Try provider
+		if result, err := r.routeToProvider(target, ctx); err == nil {
+			result.ReasonCode = "failover_provider"
+			return result, nil
+		}
+
+		observability.Warnf("Failover target '%s' failed for model '%s'", target, ctx.ModelName)
+	}
+
+	return nil, fmt.Errorf("all failover targets failed for model '%s'", ctx.ModelName)
+}
+
+// defaultRouting implements default routing strategy (latency-optimized)
+func (r *InterClusterRouter) defaultRouting(ctx *RoutingContext) (*RoutingResult, error) {
+	clusters, providers := r.config.FindClustersForModel(ctx.ModelName)
+	
+	if len(clusters) == 0 && len(providers) == 0 {
+		return nil, fmt.Errorf("no clusters or providers found for model '%s'", ctx.ModelName)
+	}
+
+	var bestResult *RoutingResult
+	bestLatency := int(^uint(0) >> 1) // Max int
+
+	// Check clusters
+	for _, cluster := range clusters {
+		if cluster.Performance.AvgLatencyMs > 0 && cluster.Performance.AvgLatencyMs < bestLatency {
+			bestLatency = cluster.Performance.AvgLatencyMs
+			bestResult = &RoutingResult{
+				TargetType:       "cluster",
+				TargetName:       cluster.Name,
+				TargetEndpoint:   cluster.Endpoint,
+				ReasonCode:       "default_latency_optimized",
+				Confidence:       0.8,
+				EstimatedCost:    cluster.CostPerToken * 1000,
+				EstimatedLatency: cluster.Performance.AvgLatencyMs,
+			}
+		}
+	}
+
+	// Check providers
+	for _, provider := range providers {
+		if provider.Performance.AvgLatencyMs > 0 && provider.Performance.AvgLatencyMs < bestLatency {
+			bestLatency = provider.Performance.AvgLatencyMs
+			bestResult = &RoutingResult{
+				TargetType:       "provider",
+				TargetName:       provider.Name,
+				TargetEndpoint:   provider.Endpoint,
+				ReasonCode:       "default_latency_optimized",
+				Confidence:       0.8,
+				EstimatedLatency: provider.Performance.AvgLatencyMs,
+			}
+		}
+	}
+
+	if bestResult == nil {
+		// Fallback: choose first available
+		if len(clusters) > 0 {
+			cluster := clusters[0]
+			bestResult = &RoutingResult{
+				TargetType:       "cluster",
+				TargetName:       cluster.Name,
+				TargetEndpoint:   cluster.Endpoint,
+				ReasonCode:       "default_first_available",
+				Confidence:       0.6,
+				EstimatedCost:    cluster.CostPerToken * 1000,
+				EstimatedLatency: cluster.Performance.AvgLatencyMs,
+			}
+		} else {
+			provider := providers[0]
+			bestResult = &RoutingResult{
+				TargetType:       "provider",
+				TargetName:       provider.Name,
+				TargetEndpoint:   provider.Endpoint,
+				ReasonCode:       "default_first_available",
+				Confidence:       0.6,
+				EstimatedLatency: provider.Performance.AvgLatencyMs,
+			}
+		}
+	}
+
+	observability.Infof("Default routing selected '%s' for model '%s' with latency %dms", 
+		bestResult.TargetName, ctx.ModelName, bestResult.EstimatedLatency)
+	return bestResult, nil
+}
+
+// clusterSupportsModel checks if a cluster supports a specific model
+func (r *InterClusterRouter) clusterSupportsModel(cluster *config.ClusterConfig, modelName string) bool {
+	for _, model := range cluster.Models {
+		if model == modelName {
+			return true
+		}
+	}
+	return false
+}
+
+// providerSupportsModel checks if a provider supports a specific model
+func (r *InterClusterRouter) providerSupportsModel(provider *config.ProviderConfig, modelName string) bool {
+	for _, model := range provider.Models {
+		if model == modelName {
+			return true
+		}
+	}
+	return false
+}
\ No newline at end of file
diff --git a/src/semantic-router/pkg/intercluster/router_test.go b/src/semantic-router/pkg/intercluster/router_test.go
new file mode 100644
index 00000000..3b4fdbea
--- /dev/null
+++ b/src/semantic-router/pkg/intercluster/router_test.go
@@ -0,0 +1,293 @@
+package intercluster
+
+import (
+	"testing"
+
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
+)
+
+func TestInterClusterRouting(t *testing.T) {
+	// Create test configuration with inter-cluster routing enabled
+	cfg := &config.RouterConfig{
+		InterClusterRouting: config.InterClusterConfig{
+			Enabled: true,
+			ClusterDiscovery: config.ClusterDiscoveryConfig{
+				Method: "static",
+				StaticClusters: []config.ClusterConfig{
+					{
+						Name:     "test-cluster-1",
+						Location: "us-west-2",
+						Type:     "vllm",
+						Endpoint: "https://cluster1.example.com:8000",
+						Models:   []string{"llama-2-70b", "mistral-7b"},
+						Performance: config.PerformanceMetrics{
+							AvgLatencyMs: 150,
+						},
+						CostPerToken: 0.001,
+					},
+					{
+						Name:     "test-cluster-2",
+						Location: "eu-west-1",
+						Type:     "vllm",
+						Endpoint: "https://cluster2.example.com:8000",
+						Models:   []string{"llama-2-70b", "gpt-4"},
+						Performance: config.PerformanceMetrics{
+							AvgLatencyMs: 200,
+						},
+						CostPerToken: 0.002,
+						Compliance:   []string{"gdpr"},
+					},
+				},
+			},
+			Providers: []config.ProviderConfig{
+				{
+					Name:     "openai-provider",
+					Type:     "openai",
+					Endpoint: "https://api.openai.com/v1",
+					Models:   []string{"gpt-4", "gpt-3.5-turbo"},
+					Performance: config.PerformanceMetrics{
+						AvgLatencyMs: 300,
+					},
+				},
+			},
+			RoutingStrategies: []config.RoutingStrategy{
+				{
+					Name:     "latency-optimized",
+					Priority: 100,
+					Conditions: []config.RoutingCondition{
+						{
+							Type:         "latency_requirement",
+							MaxLatencyMs: 200,
+						},
+					},
+					Actions: []config.RoutingAction{
+						{
+							Type:   "route_to_cluster",
+							Target: "test-cluster-1",
+						},
+					},
+				},
+				{
+					Name:     "compliance-routing",
+					Priority: 200,
+					Conditions: []config.RoutingCondition{
+						{
+							Type:               "compliance_requirement",
+							RequiredCompliance: []string{"gdpr"},
+						},
+					},
+					Actions: []config.RoutingAction{
+						{
+							Type:   "route_to_cluster",
+							Target: "test-cluster-2",
+						},
+					},
+				},
+			},
+		},
+	}
+
+	router := NewInterClusterRouter(cfg)
+
+	t.Run("TestLatencyBasedRouting", func(t *testing.T) {
+		ctx := &RoutingContext{
+			ModelName:          "llama-2-70b",
+			Category:           "general",
+			UserContent:        "Test query",
+			LatencyRequirement: intPtr(200),
+		}
+
+		result, err := router.RouteRequest(ctx)
+		if err != nil {
+			t.Fatalf("Expected successful routing, got error: %v", err)
+		}
+
+		if result.TargetName != "test-cluster-1" {
+			t.Errorf("Expected cluster 'test-cluster-1', got '%s'", result.TargetName)
+		}
+
+		if result.TargetType != "cluster" {
+			t.Errorf("Expected target type 'cluster', got '%s'", result.TargetType)
+		}
+
+		if result.ReasonCode != "strategy_latency-optimized" {
+			t.Errorf("Expected reason code 'strategy_latency-optimized', got '%s'", result.ReasonCode)
+		}
+	})
+
+	t.Run("TestComplianceBasedRouting", func(t *testing.T) {
+		ctx := &RoutingContext{
+			ModelName:              "llama-2-70b",
+			Category:               "general",
+			UserContent:            "Test query",
+			ComplianceRequirements: []string{"gdpr"},
+		}
+
+		result, err := router.RouteRequest(ctx)
+		if err != nil {
+			t.Fatalf("Expected successful routing, got error: %v", err)
+		}
+
+		if result.TargetName != "test-cluster-2" {
+			t.Errorf("Expected cluster 'test-cluster-2', got '%s'", result.TargetName)
+		}
+
+		if result.ReasonCode != "strategy_compliance-routing" {
+			t.Errorf("Expected reason code 'strategy_compliance-routing', got '%s'", result.ReasonCode)
+		}
+	})
+
+	t.Run("TestDefaultRouting", func(t *testing.T) {
+		ctx := &RoutingContext{
+			ModelName:   "mistral-7b",
+			Category:    "general",
+			UserContent: "Test query",
+		}
+
+		result, err := router.RouteRequest(ctx)
+		if err != nil {
+			t.Fatalf("Expected successful routing, got error: %v", err)
+		}
+
+		// Should route to cluster with better latency (test-cluster-1)
+		if result.TargetName != "test-cluster-1" {
+			t.Errorf("Expected cluster 'test-cluster-1' for default routing, got '%s'", result.TargetName)
+		}
+
+		if result.ReasonCode != "default_latency_optimized" {
+			t.Errorf("Expected reason code 'default_latency_optimized', got '%s'", result.ReasonCode)
+		}
+	})
+
+	t.Run("TestProviderRouting", func(t *testing.T) {
+		ctx := &RoutingContext{
+			ModelName:   "gpt-3.5-turbo",
+			Category:    "general",
+			UserContent: "Test query",
+		}
+
+		result, err := router.RouteRequest(ctx)
+		if err != nil {
+			t.Fatalf("Expected successful routing, got error: %v", err)
+		}
+
+		// Should route to either cluster or provider that supports gpt-3.5-turbo
+		// Since only openai-provider supports gpt-3.5-turbo, it should route there
+		if result.TargetName != "openai-provider" && result.TargetName != "test-cluster-2" {
+			t.Errorf("Expected 'openai-provider' or 'test-cluster-2', got '%s'", result.TargetName)
+		}
+	})
+
+	t.Run("TestNoSupportingCluster", func(t *testing.T) {
+		ctx := &RoutingContext{
+			ModelName:   "unsupported-model",
+			Category:    "general",
+			UserContent: "Test query",
+		}
+
+		_, err := router.RouteRequest(ctx)
+		if err == nil {
+			t.Fatalf("Expected error for unsupported model, got success")
+		}
+	})
+
+	t.Run("TestDisabledInterClusterRouting", func(t *testing.T) {
+		disabledCfg := &config.RouterConfig{
+			InterClusterRouting: config.InterClusterConfig{
+				Enabled: false,
+			},
+		}
+
+		disabledRouter := NewInterClusterRouter(disabledCfg)
+
+		ctx := &RoutingContext{
+			ModelName:   "llama-2-70b",
+			Category:    "general",
+			UserContent: "Test query",
+		}
+
+		_, err := disabledRouter.RouteRequest(ctx)
+		if err == nil {
+			t.Fatalf("Expected error when inter-cluster routing is disabled, got success")
+		}
+	})
+}
+
+func TestConditionEvaluation(t *testing.T) {
+	cfg := &config.RouterConfig{
+		InterClusterRouting: config.InterClusterConfig{
+			Enabled: true,
+		},
+	}
+
+	router := NewInterClusterRouter(cfg)
+
+	t.Run("TestLatencyCondition", func(t *testing.T) {
+		condition := config.RoutingCondition{
+			Type:         "latency_requirement",
+			MaxLatencyMs: 200,
+		}
+
+		ctx := &RoutingContext{
+			LatencyRequirement: intPtr(150),
+		}
+
+		if !router.evaluateCondition(condition, ctx) {
+			t.Errorf("Expected latency condition to pass")
+		}
+
+		ctx.LatencyRequirement = intPtr(250)
+		if router.evaluateCondition(condition, ctx) {
+			t.Errorf("Expected latency condition to fail")
+		}
+	})
+
+	t.Run("TestCostCondition", func(t *testing.T) {
+		condition := config.RoutingCondition{
+			Type:               "cost_sensitivity",
+			MaxCostPer1kTokens: 0.0015,
+		}
+
+		ctx := &RoutingContext{
+			CostSensitivity: float64Ptr(0.001),
+		}
+
+		if !router.evaluateCondition(condition, ctx) {
+			t.Errorf("Expected cost condition to pass")
+		}
+
+		ctx.CostSensitivity = float64Ptr(0.002)
+		if router.evaluateCondition(condition, ctx) {
+			t.Errorf("Expected cost condition to fail")
+		}
+	})
+
+	t.Run("TestComplianceCondition", func(t *testing.T) {
+		condition := config.RoutingCondition{
+			Type:               "compliance_requirement",
+			RequiredCompliance: []string{"gdpr", "sox"},
+		}
+
+		ctx := &RoutingContext{
+			ComplianceRequirements: []string{"gdpr", "sox", "hipaa"},
+		}
+
+		if !router.evaluateCondition(condition, ctx) {
+			t.Errorf("Expected compliance condition to pass")
+		}
+
+		ctx.ComplianceRequirements = []string{"gdpr"}
+		if router.evaluateCondition(condition, ctx) {
+			t.Errorf("Expected compliance condition to fail (missing sox)")
+		}
+	})
+}
+
+// Helper functions for test pointers
+func intPtr(i int) *int {
+	return &i
+}
+
+func float64Ptr(f float64) *float64 {
+	return &f
+}
\ No newline at end of file
diff --git a/website/docs/getting-started/multi-cloud-routing.md b/website/docs/getting-started/multi-cloud-routing.md
new file mode 100644
index 00000000..0c87ea2d
--- /dev/null
+++ b/website/docs/getting-started/multi-cloud-routing.md
@@ -0,0 +1,205 @@
+# Multi-Cloud and Hybrid Cloud Routing
+
+The Semantic Router now supports **inter-cluster and multi-cloud routing**, enabling sophisticated routing across multiple clusters, cloud providers, and deployment environments. This feature extends the existing semantic classification and routing capabilities to work seamlessly across distributed infrastructure.
+
+## Overview
+
+Multi-cloud routing allows you to:
+
+- **Route across multiple clusters**: Distribute load across vLLM clusters in different regions or environments
+- **Integrate cloud providers**: Route to OpenAI, Anthropic Claude, Grok, and other API providers
+- **Optimize for performance**: Route based on latency, throughput, and availability
+- **Control costs**: Route to the most cost-effective clusters and providers
+- **Ensure compliance**: Route based on data residency and regulatory requirements
+- **Provide fault tolerance**: Automatic failover and circuit breaker patterns
+
+## Configuration
+
+### Enabling Inter-Cluster Routing
+
+Add the `inter_cluster_routing` section to your configuration:
+
+```yaml
+inter_cluster_routing:
+  enabled: true
+  
+  cluster_discovery:
+    method: "static"  # Options: "static", "kubernetes", "consul", "etcd"
+    refresh_interval: "30s"
+    health_check_interval: "10s"
+```
+
+### Cluster Configuration
+
+Define your clusters in the `static_clusters` section:
+
+```yaml
+inter_cluster_routing:
+  cluster_discovery:
+    static_clusters:
+      - name: "on-prem-gpu-cluster"
+        location: "us-west-2"
+        type: "vllm"
+        endpoint: "https://on-prem.company.com:8000"
+        authentication:
+          type: "bearer"
+          token: "bearer-token-secret"
+        models:
+          - "llama-2-70b"
+          - "codellama-34b"
+        capabilities:
+          max_context_length: 4096
+          max_tokens_per_second: 100
+        performance:
+          avg_latency_ms: 150
+          throughput_rps: 50
+          availability: 99.5
+        compliance:
+          - "hipaa"
+          - "sox"
+        cost_per_token: 0.001
+```
+
+For more detailed examples and configuration options, see the [complete multi-cloud configuration example](https://github.com/vllm-project/semantic-router/blob/main/config/multi-cloud-config-example.yaml).
+
+## Routing Strategies
+
+Configure sophisticated routing logic with prioritized strategies. Strategies are evaluated in priority order (higher number = higher priority).
+
+### Example Strategy Configuration
+
+```yaml
+routing_strategies:
+  # Highest Priority: Compliance-based routing
+  - name: "gdpr-compliance-routing"
+    priority: 300
+    conditions:
+      - type: "compliance_requirement"
+        required_compliance: ["gdpr"]
+    actions:
+      - type: "route_to_cluster"
+        target: "eu-west-cluster"
+  
+  # Medium Priority: Latency-optimized routing
+  - name: "latency-optimized-routing"
+    priority: 200
+    conditions:
+      - type: "latency_requirement"
+        max_latency_ms: 200
+    actions:
+      - type: "route_to_cluster"
+        target: "edge-cluster"
+      - type: "failover"
+        failover_targets: ["backup-cluster"]
+```
+
+## Supported Providers
+
+The system supports routing to multiple types of providers:
+
+- **vLLM Clusters**: Self-hosted vLLM deployments
+- **OpenAI API**: GPT-4, GPT-3.5-turbo, and other OpenAI models
+- **Anthropic Claude**: Claude-3 and related models
+- **Grok API**: xAI's Grok models
+- **Custom Providers**: Extensible plugin architecture
+
+## Use Cases
+
+### 1. On-Premises + Cloud Hybrid
+
+Route sensitive data to on-premises clusters while using cloud providers for general queries:
+
+```yaml
+routing_strategies:
+  - name: "sensitive-data-routing"
+    priority: 300
+    conditions:
+      - type: "compliance_requirement"
+        required_compliance: ["hipaa"]
+    actions:
+      - type: "route_to_cluster"
+        target: "on-prem-secure-cluster"
+```
+
+### 2. Multi-Region GDPR Compliance
+
+Ensure EU data stays in EU clusters:
+
+```yaml
+routing_strategies:
+  - name: "eu-data-residency"
+    priority: 300
+    conditions:
+      - type: "data_residency"
+        required_region: "eu-west-1"
+    actions:
+      - type: "route_to_cluster"
+        target: "eu-west-cluster"
+```
+
+### 3. Cost Optimization
+
+Route to the most cost-effective clusters:
+
+```yaml
+routing_strategies:
+  - name: "cost-optimization"
+    priority: 150
+    conditions:
+      - type: "cost_sensitivity"
+        max_cost_per_1k_tokens: 0.001
+    actions:
+      - type: "route_to_cluster"
+        target: "cost-effective-cluster"
+```
+
+## Key Features
+
+### ✅ **Condition-Based Routing**
+- Latency requirements
+- Cost sensitivity
+- Compliance needs
+- Data residency
+- Model-specific routing
+
+### ✅ **Multiple Action Types**
+- Direct cluster routing
+- Provider routing
+- Load balancing
+- Failover strategies
+
+### ✅ **Fault Tolerance**
+- Circuit breaker patterns
+- Retry policies with backoff
+- Automatic failover
+- Health monitoring
+
+### ✅ **Authentication Support**
+- Bearer tokens
+- API keys
+- OAuth (future)
+- Per-cluster/provider auth
+
+### ✅ **Monitoring & Observability**
+- Detailed routing logs
+- Performance metrics
+- Cost tracking
+- Health status
+
+## Migration from Single-Cluster
+
+Existing configurations remain fully compatible! To add multi-cloud routing:
+
+1. **Enable the feature**: Add `inter_cluster_routing.enabled: true`
+2. **Define clusters**: Move existing endpoints to `static_clusters`
+3. **Add strategies**: Configure routing logic based on your needs
+4. **Test gradually**: Start with simple strategies and expand
+
+## Getting Started
+
+1. **Review the [complete configuration example](https://github.com/vllm-project/semantic-router/blob/main/config/multi-cloud-config-example.yaml)**
+2. **Read the [detailed configuration guide](../getting-started/configuration.md)**
+3. **Check the [API reference](../api/router/) for programmatic access**
+4. **See [troubleshooting tips](../categories/technical-details.md) for common issues**
+
+This powerful feature enables enterprise-grade routing across complex, distributed LLM infrastructure while maintaining the simplicity and intelligence of semantic classification.
\ No newline at end of file