diff --git a/config/config-mcp-classifier-example.yaml b/config/config-mcp-classifier-example.yaml
index 8fc25e37..1aaca432 100644
--- a/config/config-mcp-classifier-example.yaml
+++ b/config/config-mcp-classifier-example.yaml
@@ -75,8 +75,6 @@ vllm_endpoints:
   - name: endpoint1
     address: 127.0.0.1
     port: 8000
-    models:
-      - openai/gpt-oss-20b
     weight: 1
     health_check_path: /health
 
diff --git a/config/config.development.yaml b/config/config.development.yaml
index 3bec3828..fa7afdef 100644
--- a/config/config.development.yaml
+++ b/config/config.development.yaml
@@ -29,8 +29,6 @@ vllm_endpoints:
   - name: "local-endpoint"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "test-model"
     weight: 1
 
 model_config:
diff --git a/config/config.e2e.yaml b/config/config.e2e.yaml
index 1e2e5689..d90da786 100644
--- a/config/config.e2e.yaml
+++ b/config/config.e2e.yaml
@@ -42,15 +42,11 @@ vllm_endpoints:
   - name: "qwen-endpoint"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "Model-A"
     weight: 1
     health_check_path: "/health"
   - name: "tinyllama-endpoint"
     address: "127.0.0.1"
     port: 8001
-    models:
-      - "Model-B"
     weight: 1
     health_check_path: "/health"
 
diff --git a/config/config.production.yaml b/config/config.production.yaml
index 07258956..edd049a3 100644
--- a/config/config.production.yaml
+++ b/config/config.production.yaml
@@ -34,8 +34,6 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "openai/gpt-oss-20b"
     weight: 1
 
 model_config:
diff --git a/config/config.recipe-accuracy.yaml b/config/config.recipe-accuracy.yaml
index 82769836..18f2751d 100644
--- a/config/config.recipe-accuracy.yaml
+++ b/config/config.recipe-accuracy.yaml
@@ -44,8 +44,6 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "openai/gpt-oss-20b"
     weight: 1
 
 model_config:
diff --git a/config/config.recipe-latency.yaml b/config/config.recipe-latency.yaml
index 15008b04..00b3ae00 100644
--- a/config/config.recipe-latency.yaml
+++ b/config/config.recipe-latency.yaml
@@ -39,8 +39,6 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "openai/gpt-oss-20b"
     weight: 1
 
 model_config:
diff --git a/config/config.recipe-token-efficiency.yaml b/config/config.recipe-token-efficiency.yaml
index be3d8abc..b76aeec4 100644
--- a/config/config.recipe-token-efficiency.yaml
+++ b/config/config.recipe-token-efficiency.yaml
@@ -44,8 +44,6 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "openai/gpt-oss-20b"
     weight: 1
 
 model_config:
diff --git a/config/config.testing.yaml b/config/config.testing.yaml
index 461010eb..9dc59e5c 100644
--- a/config/config.testing.yaml
+++ b/config/config.testing.yaml
@@ -30,8 +30,6 @@ vllm_endpoints:
   - name: "mock"
     address: "172.28.0.10"
     port: 8000
-    models:
-      - "openai/gpt-oss-20b"
     weight: 1
     health_check_path: "/health"
 
diff --git a/config/config.yaml b/config/config.yaml
index 9b814cdc..579b9e35 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -34,8 +34,6 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"  # IPv4 address - REQUIRED format
     port: 8000
-    models:
-      - "openai/gpt-oss-20b"
     weight: 1
 
 model_config:
diff --git a/config/examples/system_prompt_example.yaml b/config/examples/system_prompt_example.yaml
index d0cbfd3f..ff83cd91 100644
--- a/config/examples/system_prompt_example.yaml
+++ b/config/examples/system_prompt_example.yaml
@@ -100,9 +100,9 @@ default_reasoning_effort: medium
 # vLLM endpoints configuration
 vllm_endpoints:
   - name: "mock"
-    address: "http://127.0.0.1:8000"
-    models:
-      - "openai/gpt-oss-20b"
+    address: "127.0.0.1"
+    port: 8000
+    weight: 1
 
 # Usage Notes:
 # 1. System prompts are automatically injected based on query classification
diff --git a/dashboard/frontend/src/pages/ConfigPage.tsx b/dashboard/frontend/src/pages/ConfigPage.tsx
index 4e969525..4d5867be 100644
--- a/dashboard/frontend/src/pages/ConfigPage.tsx
+++ b/dashboard/frontend/src/pages/ConfigPage.tsx
@@ -7,7 +7,6 @@ interface VLLMEndpoint {
   name: string
   address: string
   port: number
-  models: string[]
   weight: number
   health_check_path: string
 }
diff --git a/deploy/kubernetes/config.yaml b/deploy/kubernetes/config.yaml
index cdb4eb0a..5bc40cbb 100644
--- a/deploy/kubernetes/config.yaml
+++ b/deploy/kubernetes/config.yaml
@@ -34,8 +34,6 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"  # IPv4 address - REQUIRED format
     port: 8000
-    models:
-      - "openai/gpt-oss-20b"
     weight: 1
 
 model_config:
diff --git a/deploy/kubernetes/istio/config.yaml b/deploy/kubernetes/istio/config.yaml
index 8ce78ab3..e424ed9d 100644
--- a/deploy/kubernetes/istio/config.yaml
+++ b/deploy/kubernetes/istio/config.yaml
@@ -34,14 +34,10 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "10.104.192.205"  # IPv4 address - REQUIRED format
     port: 80
-    models:
-      - "llama3-8b"
     weight: 1
   - name: "endpoint2"
     address: "10.99.27.202"  # IPv4 address - REQUIRED format
     port: 80
-    models:
-      - "phi4-mini"
     weight: 1
 
 model_config:
diff --git a/deploy/openshift/config-openshift.yaml b/deploy/openshift/config-openshift.yaml
index 857a996a..9cd98925 100644
--- a/deploy/openshift/config-openshift.yaml
+++ b/deploy/openshift/config-openshift.yaml
@@ -32,14 +32,10 @@ vllm_endpoints:
   - name: "model-a-endpoint"
     address: "127.0.0.1"  # localhost in same pod
     port: 8000
-    models:
-      - "Model-A"
     weight: 1
   - name: "model-b-endpoint"
     address: "127.0.0.1"  # localhost in same pod
     port: 8001
-    models:
-      - "Model-B"
     weight: 1
 
 model_config:
diff --git a/src/semantic-router/pkg/api/server_test.go b/src/semantic-router/pkg/api/server_test.go
index aaf4e005..ecdae7b4 100644
--- a/src/semantic-router/pkg/api/server_test.go
+++ b/src/semantic-router/pkg/api/server_test.go
@@ -309,10 +309,17 @@ func TestOpenAIModelsEndpoint(t *testing.T) {
 				Name:    "primary",
 				Address: "127.0.0.1", // Changed from localhost to IP address
 				Port:    8000,
-				Models:  []string{"gpt-4o-mini", "llama-3.1-8b-instruct"},
 				Weight:  1,
 			},
 		},
+		ModelConfig: map[string]config.ModelParams{
+			"gpt-4o-mini": {
+				PreferredEndpoints: []string{"primary"},
+			},
+			"llama-3.1-8b-instruct": {
+				PreferredEndpoints: []string{"primary"},
+			},
+		},
 	}
 
 	apiServer := &ClassificationAPIServer{
diff --git a/src/semantic-router/pkg/config/config.go b/src/semantic-router/pkg/config/config.go
index 49b32c46..e550dde4 100644
--- a/src/semantic-router/pkg/config/config.go
+++ b/src/semantic-router/pkg/config/config.go
@@ -253,9 +253,6 @@ type VLLMEndpoint struct {
 	// Port of the vLLM endpoint
 	Port int `yaml:"port"`
 
-	// List of models served by this endpoint
-	Models []string `yaml:"models"`
-
 	// Load balancing weight for this endpoint
 	Weight int `yaml:"weight,omitempty"`
 }
@@ -604,32 +601,21 @@ func (c *RouterConfig) IsPromptGuardEnabled() bool {
 }
 
 // GetEndpointsForModel returns all endpoints that can serve the specified model
-// If the model has preferred endpoints configured, returns only those endpoints that are available
-// Otherwise, returns all endpoints that list the model in their Models array
+// Returns endpoints based on the model's preferred_endpoints configuration in model_config
 func (c *RouterConfig) GetEndpointsForModel(modelName string) []VLLMEndpoint {
-	var availableEndpoints []VLLMEndpoint
-
-	// First, find all endpoints that can serve this model
-	for _, endpoint := range c.VLLMEndpoints {
-		if slices.Contains(endpoint.Models, modelName) {
-			availableEndpoints = append(availableEndpoints, endpoint)
-		}
-	}
+	var endpoints []VLLMEndpoint
 
 	// Check if model has preferred endpoints configured
 	if modelConfig, ok := c.ModelConfig[modelName]; ok && len(modelConfig.PreferredEndpoints) > 0 {
-		var preferredEndpoints []VLLMEndpoint
-		for _, endpoint := range availableEndpoints {
-			if slices.Contains(modelConfig.PreferredEndpoints, endpoint.Name) {
-				preferredEndpoints = append(preferredEndpoints, endpoint)
+		// Return only the preferred endpoints
+		for _, endpointName := range modelConfig.PreferredEndpoints {
+			if endpoint, found := c.GetEndpointByName(endpointName); found {
+				endpoints = append(endpoints, *endpoint)
 			}
 		}
-		if len(preferredEndpoints) > 0 {
-			return preferredEndpoints
-		}
 	}
 
-	return availableEndpoints
+	return endpoints
 }
 
 // GetEndpointByName returns the endpoint with the specified name
@@ -642,18 +628,12 @@ func (c *RouterConfig) GetEndpointByName(name string) (*VLLMEndpoint, bool) {
 	return nil, false
 }
 
-// GetAllModels returns a list of all models available across all endpoints
+// GetAllModels returns a list of all models configured in model_config
 func (c *RouterConfig) GetAllModels() []string {
-	modelSet := make(map[string]bool)
 	var models []string
 
-	for _, endpoint := range c.VLLMEndpoints {
-		for _, model := range endpoint.Models {
-			if !modelSet[model] {
-				modelSet[model] = true
-				models = append(models, model)
-			}
-		}
+	for modelName := range c.ModelConfig {
+		models = append(models, modelName)
 	}
 
 	return models
diff --git a/src/semantic-router/pkg/config/config_test.go b/src/semantic-router/pkg/config/config_test.go
index b4028476..c7ccdc89 100644
--- a/src/semantic-router/pkg/config/config_test.go
+++ b/src/semantic-router/pkg/config/config_test.go
@@ -92,15 +92,10 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "model-a"
-      - "model-b"
     weight: 1
   - name: "endpoint2"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "model-b"
     weight: 2
 
 model_config:
@@ -177,7 +172,6 @@ tools:
 				Expect(cfg.VLLMEndpoints[0].Name).To(Equal("endpoint1"))
 				Expect(cfg.VLLMEndpoints[0].Address).To(Equal("127.0.0.1"))
 				Expect(cfg.VLLMEndpoints[0].Port).To(Equal(8000))
-				Expect(cfg.VLLMEndpoints[0].Models).To(ContainElements("model-a", "model-b"))
 				Expect(cfg.VLLMEndpoints[0].Weight).To(Equal(1))
 
 				Expect(cfg.VLLMEndpoints[1].Name).To(Equal("endpoint2"))
@@ -788,22 +782,14 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "model-a"
-      - "model-b"
     weight: 1
   - name: "endpoint2"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "model-b"
-      - "model-c"
     weight: 2
   - name: "endpoint3"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "model-a"
     weight: 1
 
 model_config:
@@ -841,13 +827,12 @@ default_model: "model-b"
 				Expect(endpointNames).To(ContainElements("endpoint1", "endpoint3"))
 			})
 
-			It("should return all available endpoints when no preferences configured", func() {
+			It("should return empty slice when no preferred endpoints configured", func() {
 				cfg, err := config.LoadConfig(configFile)
 				Expect(err).NotTo(HaveOccurred())
 
 				endpoints := cfg.GetEndpointsForModel("model-c")
-				Expect(endpoints).To(HaveLen(1))
-				Expect(endpoints[0].Name).To(Equal("endpoint2"))
+				Expect(endpoints).To(BeEmpty())
 			})
 
 			It("should return empty slice for non-existent model", func() {
@@ -858,11 +843,11 @@ default_model: "model-b"
 				Expect(endpoints).To(BeEmpty())
 			})
 
-			It("should fallback to all available endpoints if preferred endpoints don't exist", func() {
+			It("should return only preferred endpoints", func() {
 				cfg, err := config.LoadConfig(configFile)
 				Expect(err).NotTo(HaveOccurred())
 
-				// model-b has preferred endpoint2, which serves it
+				// model-b has preferred endpoint2
 				endpoints := cfg.GetEndpointsForModel("model-b")
 				Expect(endpoints).To(HaveLen(1))
 				Expect(endpoints[0].Name).To(Equal("endpoint2"))
@@ -879,7 +864,6 @@ default_model: "model-b"
 				Expect(endpoint.Name).To(Equal("endpoint1"))
 				Expect(endpoint.Address).To(Equal("127.0.0.1"))
 				Expect(endpoint.Port).To(Equal(8000))
-				Expect(endpoint.Models).To(ContainElements("model-a", "model-b"))
 			})
 
 			It("should return false when endpoint doesn't exist", func() {
@@ -893,7 +877,7 @@ default_model: "model-b"
 		})
 
 		Describe("GetAllModels", func() {
-			It("should return all unique models across endpoints", func() {
+			It("should return all models from model_config", func() {
 				cfg, err := config.LoadConfig(configFile)
 				Expect(err).NotTo(HaveOccurred())
 
@@ -908,7 +892,7 @@ default_model: "model-b"
 				cfg, err := config.LoadConfig(configFile)
 				Expect(err).NotTo(HaveOccurred())
 
-				// model-a is available on endpoint1 (weight 1) and endpoint3 (weight 1)
+				// model-a has preferred endpoints: endpoint1 (weight 1) and endpoint3 (weight 1)
 				// Since they have the same weight, it should return the first one found
 				endpointName, found := cfg.SelectBestEndpointForModel("model-a")
 				Expect(found).To(BeTrue())
@@ -924,13 +908,13 @@ default_model: "model-b"
 				Expect(endpointName).To(BeEmpty())
 			})
 
-			It("should select single endpoint when only one available", func() {
+			It("should return false when model has no preferred endpoints", func() {
 				cfg, err := config.LoadConfig(configFile)
 				Expect(err).NotTo(HaveOccurred())
 
 				endpointName, found := cfg.SelectBestEndpointForModel("model-c")
-				Expect(found).To(BeTrue())
-				Expect(endpointName).To(Equal("endpoint2"))
+				Expect(found).To(BeFalse())
+				Expect(endpointName).To(BeEmpty())
 			})
 		})
 
@@ -944,16 +928,18 @@ default_model: "model-b"
 			})
 
 			It("should fail validation when a category model has no endpoints", func() {
-				// Add a model to categories that doesn't exist in any endpoint
+				// Add a model to categories that doesn't have preferred_endpoints configured
 				configContent := `
 vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "existing-model"
     weight: 1
 
+model_config:
+  "existing-model":
+    preferred_endpoints: ["endpoint1"]
+
 categories:
   - name: "test"
     model_scores:
@@ -981,10 +967,12 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "existing-model"
     weight: 1
 
+model_config:
+  "existing-model":
+    preferred_endpoints: ["endpoint1"]
+
 default_model: "missing-default-model"
 `
 				err := os.WriteFile(configFile, []byte(configContent), 0o644)
@@ -1007,10 +995,12 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "test-model"
     weight: 1
 
+model_config:
+  "test-model":
+    preferred_endpoints: ["endpoint1"]
+
 categories:
   - name: "test"
     model_scores:
@@ -1034,10 +1024,12 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "::1"
     port: 8000
-    models:
-      - "test-model"
     weight: 1
 
+model_config:
+  "test-model":
+    preferred_endpoints: ["endpoint1"]
+
 categories:
   - name: "test"
     model_scores:
@@ -1063,10 +1055,12 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "example.com"
     port: 8000
-    models:
-      - "test-model"
     weight: 1
 
+model_config:
+  "test-model":
+    preferred_endpoints: ["endpoint1"]
+
 categories:
   - name: "test"
     model_scores:
@@ -1092,10 +1086,12 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "http://127.0.0.1"
     port: 8000
-    models:
-      - "test-model"
     weight: 1
 
+model_config:
+  "test-model":
+    preferred_endpoints: ["endpoint1"]
+
 categories:
   - name: "test"
     model_scores:
@@ -1120,10 +1116,12 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1/api"
     port: 8000
-    models:
-      - "test-model"
     weight: 1
 
+model_config:
+  "test-model":
+    preferred_endpoints: ["endpoint1"]
+
 categories:
   - name: "test"
     model_scores:
@@ -1147,10 +1145,12 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1:8080"
     port: 8000
-    models:
-      - "test-model"
     weight: 1
 
+model_config:
+  "test-model":
+    preferred_endpoints: ["endpoint1"]
+
 categories:
   - name: "test"
     model_scores:
@@ -1175,10 +1175,12 @@ vllm_endpoints:
   - name: "test-endpoint"
     address: "https://example.com"
     port: 8000
-    models:
-      - "test-model"
     weight: 1
 
+model_config:
+  "test-model":
+    preferred_endpoints: ["test-endpoint"]
+
 categories:
   - name: "test"
     model_scores:
@@ -1212,16 +1214,18 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models:
-      - "test-model1"
     weight: 1
   - name: "endpoint2"
     address: "example.com"
     port: 8001
-    models:
-      - "test-model2"
     weight: 1
 
+model_config:
+  "test-model1":
+    preferred_endpoints: ["endpoint1"]
+  "test-model2":
+    preferred_endpoints: ["endpoint2"]
+
 categories:
   - name: "test"
     model_scores:
diff --git a/src/semantic-router/pkg/config/validation_test.go b/src/semantic-router/pkg/config/validation_test.go
index 1189c054..a3950cbb 100644
--- a/src/semantic-router/pkg/config/validation_test.go
+++ b/src/semantic-router/pkg/config/validation_test.go
@@ -186,13 +186,11 @@ var _ = Describe("IP Address Validation", func() {
 						Name:    "endpoint1",
 						Address: "127.0.0.1",
 						Port:    8000,
-						Models:  []string{"model1"},
 					},
 					{
 						Name:    "endpoint2",
 						Address: "::1",
 						Port:    8001,
-						Models:  []string{"model2"},
 					},
 				}
 
@@ -208,7 +206,6 @@ var _ = Describe("IP Address Validation", func() {
 						Name:    "invalid-endpoint",
 						Address: "example.com",
 						Port:    8000,
-						Models:  []string{"model1"},
 					},
 				}
 
@@ -228,7 +225,6 @@ var _ = Describe("IP Address Validation", func() {
 						Name:    "test-endpoint",
 						Address: "http://127.0.0.1",
 						Port:    8000,
-						Models:  []string{"model1"},
 					},
 				}
 
diff --git a/src/semantic-router/pkg/extproc/endpoint_selection_test.go b/src/semantic-router/pkg/extproc/endpoint_selection_test.go
index 480056c5..7339edf9 100644
--- a/src/semantic-router/pkg/extproc/endpoint_selection_test.go
+++ b/src/semantic-router/pkg/extproc/endpoint_selection_test.go
@@ -299,7 +299,6 @@ var _ = Describe("Endpoint Selection", func() {
 			Expect(endpoint1.Name).To(Equal("test-endpoint1"))
 			Expect(endpoint1.Address).To(Equal("127.0.0.1"))
 			Expect(endpoint1.Port).To(Equal(8000))
-			Expect(endpoint1.Models).To(ContainElements("model-a", "model-b"))
 			Expect(endpoint1.Weight).To(Equal(1))
 
 			// Verify second endpoint
@@ -307,7 +306,6 @@ var _ = Describe("Endpoint Selection", func() {
 			Expect(endpoint2.Name).To(Equal("test-endpoint2"))
 			Expect(endpoint2.Address).To(Equal("127.0.0.1"))
 			Expect(endpoint2.Port).To(Equal(8001))
-			Expect(endpoint2.Models).To(ContainElement("model-b"))
 			Expect(endpoint2.Weight).To(Equal(2))
 		})
 
diff --git a/src/semantic-router/pkg/extproc/models_endpoint_test.go b/src/semantic-router/pkg/extproc/models_endpoint_test.go
index 9fbd5d17..20192548 100644
--- a/src/semantic-router/pkg/extproc/models_endpoint_test.go
+++ b/src/semantic-router/pkg/extproc/models_endpoint_test.go
@@ -18,10 +18,17 @@ func TestHandleModelsRequest(t *testing.T) {
 				Name:    "primary",
 				Address: "127.0.0.1",
 				Port:    8000,
-				Models:  []string{"gpt-4o-mini", "llama-3.1-8b-instruct"},
 				Weight:  1,
 			},
 		},
+		ModelConfig: map[string]config.ModelParams{
+			"gpt-4o-mini": {
+				PreferredEndpoints: []string{"primary"},
+			},
+			"llama-3.1-8b-instruct": {
+				PreferredEndpoints: []string{"primary"},
+			},
+		},
 	}
 
 	router := &OpenAIRouter{
@@ -142,10 +149,14 @@ func TestHandleRequestHeadersWithModelsEndpoint(t *testing.T) {
 				Name:    "primary",
 				Address: "127.0.0.1",
 				Port:    8000,
-				Models:  []string{"gpt-4o-mini"},
 				Weight:  1,
 			},
 		},
+		ModelConfig: map[string]config.ModelParams{
+			"gpt-4o-mini": {
+				PreferredEndpoints: []string{"primary"},
+			},
+		},
 	}
 
 	router := &OpenAIRouter{
diff --git a/src/semantic-router/pkg/extproc/test_utils_test.go b/src/semantic-router/pkg/extproc/test_utils_test.go
index 3492a402..4cbc8999 100644
--- a/src/semantic-router/pkg/extproc/test_utils_test.go
+++ b/src/semantic-router/pkg/extproc/test_utils_test.go
@@ -199,14 +199,12 @@ func CreateTestConfig() *config.RouterConfig {
 				Name:    "test-endpoint1",
 				Address: "127.0.0.1",
 				Port:    8000,
-				Models:  []string{"model-a", "model-b"},
 				Weight:  1,
 			},
 			{
 				Name:    "test-endpoint2",
 				Address: "127.0.0.1",
 				Port:    8001,
-				Models:  []string{"model-b"},
 				Weight:  2,
 			},
 		},
diff --git a/website/docs/installation/configuration.md b/website/docs/installation/configuration.md
index 8c2c26f5..340ad847 100644
--- a/website/docs/installation/configuration.md
+++ b/website/docs/installation/configuration.md
@@ -159,10 +159,13 @@ Configure your LLM servers:
 vllm_endpoints:
   - name: "my_endpoint"
     address: "127.0.0.1"  # Your server IP - MUST be IP address format
-    port: 8000                # Your server port
-    models:
-      - "llama2-7b"          # Model name - must match vLLM --served-model-name
-    weight: 1                 # Load balancing weight
+    port: 8000            # Your server port
+    weight: 1             # Load balancing weight
+
+# Model configuration - maps models to endpoints
+model_config:
+  "llama2-7b":            # Model name - must match vLLM --served-model-name
+    preferred_endpoints: ["my_endpoint"]
 ```
 
 #### Address Format Requirements
@@ -204,11 +207,12 @@ The model names in the `models` array must **exactly match** the `--served-model
 # vLLM server command:
 vllm serve meta-llama/Llama-2-7b-hf --served-model-name llama2-7b
 
-# config.yaml must use the same name:
-vllm_endpoints:
-  - models: ["llama2-7b"]  # ✅ Matches --served-model-name
-
+# config.yaml must reference the model in model_config:
 model_config:
+  "llama2-7b":  # ✅ Matches --served-model-name
+    preferred_endpoints: ["your-endpoint"]
+
+vllm_endpoints:
   "llama2-7b":             # ✅ Matches --served-model-name
     # ... configuration
 ```
@@ -683,12 +687,10 @@ vllm_endpoints:
   - name: "math_endpoint"
     address: "192.168.1.10"  # Math server IP
     port: 8000
-    models: ["math-model"]
     weight: 1
   - name: "general_endpoint"
     address: "192.168.1.20"  # General server IP
     port: 8000
-    models: ["general-model"]
     weight: 1
 
 categories:
@@ -711,12 +713,10 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "192.168.1.30"  # Primary server IP
     port: 8000
-    models: ["my-model"]
     weight: 2              # Higher weight = more traffic
   - name: "endpoint2"
     address: "192.168.1.31"  # Secondary server IP
     port: 8000
-    models: ["my-model"]
     weight: 1
 ```
 
diff --git a/website/docs/installation/installation.md b/website/docs/installation/installation.md
index a96c683b..e53e0284 100644
--- a/website/docs/installation/installation.md
+++ b/website/docs/installation/installation.md
@@ -109,13 +109,11 @@ Edit `config/config.yaml` to point to your LLM endpoints:
 vllm_endpoints:
   - name: "your-endpoint"
     address: "127.0.0.1"        # MUST be IP address (IPv4 or IPv6)
-    port: 11434                     # Replace with your port
-    models:
-      - "your-model-name"           # Replace with your model
+    port: 11434                 # Replace with your port
     weight: 1
 
 model_config:
-  "your-model-name":
+  "your-model-name":            # Replace with your model name
     pii_policy:
       allow_by_default: false  # Deny all PII by default
       pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"]  # Only allow these specific PII types
@@ -146,11 +144,12 @@ The model name in your configuration **must exactly match** the `--served-model-
 # When starting vLLM server:
 vllm serve microsoft/phi-4 --port 11434 --served-model-name your-model-name
 
-# The config.yaml must use the same name:
-vllm_endpoints:
-  - models: ["your-model-name"]  # ✅ Must match --served-model-name
-
+# The config.yaml must reference the model in model_config:
 model_config:
+  "your-model-name":  # ✅ Must match --served-model-name
+    preferred_endpoints: ["your-endpoint"]
+
+vllm_endpoints:
   "your-model-name":             # ✅ Must match --served-model-name
     # ... configuration
 ```
diff --git a/website/docs/training/model-performance-eval.md b/website/docs/training/model-performance-eval.md
index ce67a205..529c5481 100644
--- a/website/docs/training/model-performance-eval.md
+++ b/website/docs/training/model-performance-eval.md
@@ -69,11 +69,9 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 11434
-    models: ["phi4"]          # ✅ Matches --served_model_name phi4
   - name: "endpoint2"
     address: "127.0.0.1"
     port: 11435
-    models: ["qwen3-0.6B"]    # ✅ Matches --served_model_name qwen3-0.6B
 
 model_config:
   "phi4":                     # ✅ Matches --served_model_name phi4
diff --git a/website/docs/tutorials/content-safety/pii-detection.md b/website/docs/tutorials/content-safety/pii-detection.md
index f5acca7b..cd58ec60 100644
--- a/website/docs/tutorials/content-safety/pii-detection.md
+++ b/website/docs/tutorials/content-safety/pii-detection.md
@@ -61,11 +61,9 @@ vllm_endpoints:
   - name: secure-model
     address: "127.0.0.1"
     port: 8080
-    models: ["secure-llm"]
   - name: general-model
     address: "127.0.0.1"
     port: 8081
-    models: ["general-llm"]
 
 # Model-specific configurations
 model_config:
diff --git a/website/docs/tutorials/intelligent-route/reasoning.md b/website/docs/tutorials/intelligent-route/reasoning.md
index f9c7426b..9aa6183d 100644
--- a/website/docs/tutorials/intelligent-route/reasoning.md
+++ b/website/docs/tutorials/intelligent-route/reasoning.md
@@ -34,7 +34,6 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models: ["deepseek-v31", "qwen3-30b", "openai/gpt-oss-20b"]  # Must match --served-model-name
     weight: 1
 
 # Reasoning family configurations (how to express reasoning for a family)