diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index d643504a..d4f7332b 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -10,7 +10,7 @@ on:
 jobs:
   pre-commit:
     runs-on: ubuntu-latest
-    name: Run pre-commit hooks on Go, Rust, JavaScripts, Markdown and Python files
+    name: Run pre-commit hooks check file lint
 
     steps:
     - name: Check out the repo
@@ -47,6 +47,7 @@ jobs:
           build-essential \
           pkg-config 
         npm install -g markdownlint-cli
+        pip install --user yamllint
 
     - name: Cache Rust dependencies
       uses: actions/cache@v4
@@ -82,10 +83,10 @@ jobs:
     - name: Install pre-commit
       run: pip install pre-commit
 
-    - name: Run pre-commit on Go, Rust, JavaScript, Markdown and Python files
+    - name: Run pre-commit on Go, Rust, JavaScript, Markdown, Yaml and Python files
       run: |
         # Find all Go, Rust, JavaScripts, Markdown and Python files (excluding vendored/generated code)
-        FILES=$(find . -type f \( -name "*.go" -o -name "*.rs" -o -name "*.py" -o -name "*.js" -o -name "*.md" \) \
+        FILES=$(find . -type f \( -name "*.go" -o -name "*.rs" -o -name "*.py" -o -name "*.js" -o -name "*.md" -o -name "*.yaml" -o -name "*.yml" \) \
           ! -path "./target/*" \
           ! -path "./candle-binding/target/*" \
           ! -path "./.git/*" \
@@ -100,7 +101,7 @@ jobs:
           echo "Running pre-commit on files: $FILES"
           pre-commit run --files $FILES
         else
-          echo "No Go, Rust, JavaScript, Markdown or Python files found to check"
+          echo "No Go, Rust, JavaScript, Markdown, Yaml, or Python files found to check"
         fi
 
     - name: Show pre-commit results
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 38d36c9a..929f1ed5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,79 +2,89 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 # Basic hooks for Go, Rust, Python And JavaScript files only
--   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v6.0.0
-    hooks:
-    -   id: trailing-whitespace
-        files: \.(go|rs|py|js)$
-    -   id: end-of-file-fixer
-        files: \.(go|rs|py|js)$
-    -   id: check-added-large-files
-        args: ['--maxkb=500']
-        files: \.(go|rs|py|js)$
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v6.0.0
+  hooks:
+  - id: trailing-whitespace
+    files: \.(go|rs|py|js)$
+  - id: end-of-file-fixer
+    files: \.(go|rs|py|js)$
+  - id: check-added-large-files
+    args: ['--maxkb=500']
+    files: \.(go|rs|py|js)$
 
 # Go specific hooks
--   repo: local
-    hooks:
-    -   id: go-fmt
-        name: go fmt
-        entry: gofmt -w
-        language: system
-        files: \.go$
+- repo: local
+  hooks:
+  - id: go-fmt
+    name: go fmt
+    entry: gofmt -w
+    language: system
+    files: \.go$
 
 # Markdown specific hooks
--   repo: local
-    hooks:
-      -   id: md-fmt
-          name: md fmt
-          entry: bash -c "make markdown-lint"
-          language: system
-          files: \.md$
-          exclude: ^(\node_modules/)
+- repo: local
+  hooks:
+  - id: md-fmt
+    name: md fmt
+    entry: bash -c "make markdown-lint"
+    language: system
+    files: \.md$
+    exclude: ^(\node_modules/)
+
+# Yaml specific hooks
+- repo: local
+  hooks:
+  - id: yaml-and-yml-fmt
+    name: yaml/yml fmt
+    entry: bash -c "make markdown-lint"
+    language: system
+    files: \.(yaml|yml)$
+    exclude: ^(\node_modules/)
 
 # JavaScript specific hooks
--   repo: local
-    hooks:
-      -   id: js-lint
-          name: js lint
-          entry: bash -c 'cd website && npm install 2>/dev/null || true && npm run lint'
-          language: system
-          files: \.js$
-          exclude: ^(\node_modules/)
-          pass_filenames: false
+- repo: local
+  hooks:
+  - id: js-lint
+    name: js lint
+    entry: bash -c 'cd website && npm install 2>/dev/null || true && npm run lint'
+    language: system
+    files: \.js$
+    exclude: ^(\node_modules/)
+    pass_filenames: false
 
-# Rust specific hooks  
--   repo: local
-    hooks:
-    -   id: cargo-fmt
-        name: cargo fmt
-        entry: bash -c 'cd candle-binding && rustup component add rustfmt 2>/dev/null || true && cargo fmt'
-        language: system
-        files: \.rs$
-        pass_filenames: false
-    -   id: cargo-check
-        name: cargo check
-        entry: bash -c 'cd candle-binding && cargo check'
-        language: system
-        files: \.rs$
-        pass_filenames: false
+# Rust specific hooks
+- repo: local
+  hooks:
+  - id: cargo-fmt
+    name: cargo fmt
+    entry: bash -c 'cd candle-binding && rustup component add rustfmt 2>/dev/null || true && cargo fmt'
+    language: system
+    files: \.rs$
+    pass_filenames: false
+  - id: cargo-check
+    name: cargo check
+    entry: bash -c 'cd candle-binding && cargo check'
+    language: system
+    files: \.rs$
+    pass_filenames: false
 
 # Python specific hooks
--   repo: https://github.com/psf/black
-    rev: 25.1.0
-    hooks:
-    -   id: black
-        language_version: python3
-        files: \.py$
-        exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
+- repo: https://github.com/psf/black
+  rev: 25.1.0
+  hooks:
+  - id: black
+    language_version: python3
+    files: \.py$
+    exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
 
--   repo: https://github.com/PyCQA/isort
-    rev: 6.0.1
-    hooks:
-    -   id: isort
-        args: ["--profile", "black"]
-        files: \.py$
-        exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
+- repo: https://github.com/PyCQA/isort
+  rev: 6.0.1
+  hooks:
+  - id: isort
+    args: ["--profile", "black"]
+    files: \.py$
+    exclude: ^(\.venv/|venv/|env/|__pycache__/|\.git/|site/)
 
 # Commented out flake8 - only reports issues, doesn't auto-fix
 # -   repo: https://github.com/PyCQA/flake8
diff --git a/.yamllint b/.yamllint
new file mode 100644
index 00000000..4c7aa56f
--- /dev/null
+++ b/.yamllint
@@ -0,0 +1,57 @@
+ignore: |
+  # This directory fails checks since many files
+  # are templated. Instead, we run the linter
+  # after running `make generate-manifests` which creates
+  # the Install YAML in bin/
+  .git
+  .github
+  node_modules
+
+rules:
+  braces:
+    min-spaces-inside: 0
+    max-spaces-inside: 0
+    min-spaces-inside-empty: -1
+    max-spaces-inside-empty: -1
+  brackets:
+    min-spaces-inside: 0
+    max-spaces-inside: 1
+    min-spaces-inside-empty: -1
+    max-spaces-inside-empty: -1
+  colons:
+    max-spaces-before: 0
+    max-spaces-after: 1
+  commas:
+    max-spaces-before: 1
+    min-spaces-after: 1
+    max-spaces-after: 1
+  comments:
+    level: warning
+    require-starting-space: true
+    min-spaces-from-content: 2
+  comments-indentation:
+    level: warning
+  document-end: disable
+  document-start: disable
+  empty-lines:
+    max: 2
+    max-start: 0
+    max-end: 1
+  empty-values:
+    forbid-in-block-mappings: false
+    forbid-in-flow-mappings: true
+  hyphens:
+    max-spaces-after: 1
+  indentation:
+    spaces: 2
+    indent-sequences: consistent
+    check-multi-line-strings: false
+  key-duplicates: enable
+  key-ordering: disable
+  new-line-at-end-of-file: enable
+  new-lines:
+    type: unix
+  trailing-spaces: enable
+  truthy:
+    check-keys: false
+    level: warning
diff --git a/Makefile b/Makefile
index e148ed67..026ed8c1 100644
--- a/Makefile
+++ b/Makefile
@@ -387,3 +387,7 @@ markdown-lint:
 markdown-lint-fix:
 	@echo "Fixing markdown lint issues..."
 	markdownlint -c markdownlint.yaml "**/*.md" --ignore node_modules --ignore website/node_modules --fix
+
+yaml-lint:
+	@echo "Linting YAML files..."
+	yamllint --config-file=.yamllint .
diff --git a/config/cache/milvus.yaml b/config/cache/milvus.yaml
index bac19b4d..0838c4e7 100644
--- a/config/cache/milvus.yaml
+++ b/config/cache/milvus.yaml
@@ -1,171 +1,170 @@
-# Milvus Vector Database Configuration for Semantic Cache
-# 
-# This configuration file contains settings for using Milvus as the semantic cache backend.
-# To use this configuration:
-# 1. Set backend_type: "milvus" in your main config.yaml
-# 2. Set backend_config_path: "config/cache/milvus.yaml" in your main config.yaml
-# 3. Ensure Milvus server is running and accessible
-# 4. Build with Milvus support: go build -tags=milvus
-
-# Milvus connection settings
-connection:
-  # Milvus server host (change for production deployment)
-  host: "localhost"  # For production: use your Milvus cluster endpoint
-  
-  # Milvus server port
-  port: 19530  # Standard Milvus port
-  
-  # Database name (optional, defaults to "default")
-  database: "semantic_router_cache"
-  
-  # Connection timeout in seconds
-  timeout: 30
-  
-  # Authentication (enable for production)
-  auth:
-    enabled: false  # Set to true for production
-    username: ""    # Your Milvus username
-    password: ""    # Your Milvus password
-  
-  # TLS/SSL configuration (recommended for production)
-  tls:
-    enabled: false      # Set to true for secure connections
-    cert_file: ""       # Path to client certificate
-    key_file: ""        # Path to client private key
-    ca_file: ""         # Path to CA certificate
-
-# Collection settings
-collection:
-  # Name of the collection to store cache entries
-  name: "semantic_cache"
-  
-  # Description of the collection
-  description: "Semantic cache for LLM request-response pairs"
-  
-  # Vector field configuration
-  vector_field:
-    # Name of the vector field
-    name: "embedding"
-    
-    # Dimension of the embeddings (auto-detected from model at runtime)
-    dimension: 384  # This value is ignored - dimension is auto-detected from the embedding model
-    
-    # Metric type for similarity calculation
-    metric_type: "IP"  # Inner Product (cosine similarity for normalized vectors)
-  
-  # Index configuration for the vector field
-  index:
-    # Index type (HNSW is recommended for most use cases)
-    type: "HNSW"
-    
-    # Index parameters
-    params:
-      M: 16              # Number of bi-directional links for each node
-      efConstruction: 64  # Search scope during index construction
-
-# Search configuration
-search:
-  # Search parameters
-  params:
-    ef: 64  # Search scope during search (should be >= topk)
-  
-  # Number of top results to retrieve for similarity comparison
-  topk: 10
-  
-  # Consistency level for search operations
-  consistency_level: "Session"  # Options: Strong, Session, Bounded, Eventually
-
-# Performance and resource settings
-performance:
-  # Connection pool settings
-  connection_pool:
-    # Maximum number of connections in the pool
-    max_connections: 10
-    
-    # Maximum idle connections
-    max_idle_connections: 5
-    
-    # Connection timeout for acquiring from pool
-    acquire_timeout: 5
-  
-  # Batch operation settings
-  batch:
-    # Maximum batch size for insert operations
-    insert_batch_size: 1000
-    
-    # Batch timeout in seconds
-    timeout: 30
-
-# Data management
-data_management:
-  # Automatic data expiration (TTL) settings
-  ttl:
-    # Enable automatic TTL-based cleanup (requires TTL to be set in main config)
-    enabled: true
-    
-    # Field name to store timestamp for TTL calculation
-    timestamp_field: "timestamp"
-    
-    # Cleanup interval in seconds (how often to run cleanup)
-    cleanup_interval: 3600  # 1 hour
-  
-  # Compaction settings
-  compaction:
-    # Enable automatic compaction
-    enabled: true
-    
-    # Compaction interval in seconds
-    interval: 86400  # 24 hours
-
-# Logging and monitoring
-logging:
-  # Log level for Milvus client operations (debug, info, warn, error)
-  level: "info"
-  
-  # Enable query/search logging for debugging
-  enable_query_log: false
-  
-  # Enable performance metrics collection
-  enable_metrics: true
-
-# Development and debugging settings
-development:
-  # Drop collection on startup (WARNING: This will delete all cached data)
-  drop_collection_on_startup: true  # Enable for development to test dynamic dimensions
-  
-  # Create collection if it doesn't exist
-  auto_create_collection: true
-  
-  # Print detailed error messages
-  verbose_errors: true
-
-# Example configurations for different environments:
-#
-# Local Development (Docker):
-# connection:
-#   host: "localhost"
-#   port: 19530
-#   auth:
-#     enabled: false
-#   development:
-#     drop_collection_on_startup: true  # Clean start for development
-#
-# Production (Zilliz Cloud):
-# connection:
-#   host: "your-cluster-endpoint.zillizcloud.com"
-#   port: 443
-#   auth:
-#     enabled: true
-#     username: "your-username"
-#     password: "your-password"
-#   tls:
-#     enabled: true
-#   development:
-#     drop_collection_on_startup: false
-#     auto_create_collection: false  # Pre-create collections in production
-#
-# Kubernetes Deployment:
-# connection:
-#   host: "milvus-service.milvus-system.svc.cluster.local"
-#   port: 19530
-#   timeout: 60  # Longer timeout for cluster environments
+# Milvus Vector Database Configuration for Semantic Cache
+# This configuration file contains settings for using Milvus as the semantic cache backend.
+# To use this configuration:
+# 1. Set backend_type: "milvus" in your main config.yaml
+# 2. Set backend_config_path: "config/cache/milvus.yaml" in your main config.yaml
+# 3. Ensure Milvus server is running and accessible
+# 4. Build with Milvus support: go build -tags=milvus
+
+# Milvus connection settings
+connection:
+  # Milvus server host (change for production deployment)
+  host: "localhost"  # For production: use your Milvus cluster endpoint
+
+  # Milvus server port
+  port: 19530  # Standard Milvus port
+
+  # Database name (optional, defaults to "default")
+  database: "semantic_router_cache"
+
+  # Connection timeout in seconds
+  timeout: 30
+
+  # Authentication (enable for production)
+  auth:
+    enabled: false  # Set to true for production
+    username: ""    # Your Milvus username
+    password: ""    # Your Milvus password
+
+  # TLS/SSL configuration (recommended for production)
+  tls:
+    enabled: false      # Set to true for secure connections
+    cert_file: ""       # Path to client certificate
+    key_file: ""        # Path to client private key
+    ca_file: ""         # Path to CA certificate
+
+# Collection settings
+collection:
+  # Name of the collection to store cache entries
+  name: "semantic_cache"
+
+  # Description of the collection
+  description: "Semantic cache for LLM request-response pairs"
+
+  # Vector field configuration
+  vector_field:
+    # Name of the vector field
+    name: "embedding"
+
+    # Dimension of the embeddings (auto-detected from model at runtime)
+    dimension: 384  # This value is ignored - dimension is auto-detected from the embedding model
+
+    # Metric type for similarity calculation
+    metric_type: "IP"  # Inner Product (cosine similarity for normalized vectors)
+
+  # Index configuration for the vector field
+  index:
+    # Index type (HNSW is recommended for most use cases)
+    type: "HNSW"
+
+    # Index parameters
+    params:
+      M: 16              # Number of bi-directional links for each node
+      efConstruction: 64  # Search scope during index construction
+
+# Search configuration
+search:
+  # Search parameters
+  params:
+    ef: 64  # Search scope during search (should be >= topk)
+
+  # Number of top results to retrieve for similarity comparison
+  topk: 10
+
+  # Consistency level for search operations
+  consistency_level: "Session"  # Options: Strong, Session, Bounded, Eventually
+
+# Performance and resource settings
+performance:
+  # Connection pool settings
+  connection_pool:
+    # Maximum number of connections in the pool
+    max_connections: 10
+
+    # Maximum idle connections
+    max_idle_connections: 5
+
+    # Connection timeout for acquiring from pool
+    acquire_timeout: 5
+
+  # Batch operation settings
+  batch:
+    # Maximum batch size for insert operations
+    insert_batch_size: 1000
+
+    # Batch timeout in seconds
+    timeout: 30
+
+# Data management
+data_management:
+  # Automatic data expiration (TTL) settings
+  ttl:
+    # Enable automatic TTL-based cleanup (requires TTL to be set in main config)
+    enabled: true
+
+    # Field name to store timestamp for TTL calculation
+    timestamp_field: "timestamp"
+
+    # Cleanup interval in seconds (how often to run cleanup)
+    cleanup_interval: 3600  # 1 hour
+
+  # Compaction settings
+  compaction:
+    # Enable automatic compaction
+    enabled: true
+
+    # Compaction interval in seconds
+    interval: 86400  # 24 hours
+
+# Logging and monitoring
+logging:
+  # Log level for Milvus client operations (debug, info, warn, error)
+  level: "info"
+
+  # Enable query/search logging for debugging
+  enable_query_log: false
+
+  # Enable performance metrics collection
+  enable_metrics: true
+
+# Development and debugging settings
+development:
+  # Drop collection on startup (WARNING: This will delete all cached data)
+  drop_collection_on_startup: true  # Enable for development to test dynamic dimensions
+
+  # Create collection if it doesn't exist
+  auto_create_collection: true
+
+  # Print detailed error messages
+  verbose_errors: true
+
+# Example configurations for different environments:
+#
+# Local Development (Docker):
+# connection:
+#   host: "localhost"
+#   port: 19530
+#   auth:
+#     enabled: false
+#   development:
+#     drop_collection_on_startup: true  # Clean start for development
+#
+# Production (Zilliz Cloud):
+# connection:
+#   host: "your-cluster-endpoint.zillizcloud.com"
+#   port: 443
+#   auth:
+#     enabled: true
+#     username: "your-username"
+#     password: "your-password"
+#   tls:
+#     enabled: true
+#   development:
+#     drop_collection_on_startup: false
+#     auto_create_collection: false  # Pre-create collections in production
+#
+# Kubernetes Deployment:
+# connection:
+#   host: "milvus-service.milvus-system.svc.cluster.local"
+#   port: 19530
+#   timeout: 60  # Longer timeout for cluster environments
diff --git a/config/config.yaml b/config/config.yaml
index 4f951ac3..faabb985 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -8,16 +8,16 @@ semantic_cache:
   similarity_threshold: 0.8
   max_entries: 1000  # Only applies to memory backend
   ttl_seconds: 3600
-  
+
   # For production environments, use Milvus for scalable caching:
   # backend_type: "milvus"
   # backend_config_path: "config/cache/milvus.yaml"
-  
+
   # Development/Testing: Use in-memory cache (current configuration)
   # - Fast startup and no external dependencies
   # - Limited to single instance scaling
   # - Data lost on restart
-  
+
   # Production: Use Milvus vector database
   # - Horizontally scalable and persistent
   # - Supports distributed deployments
@@ -74,21 +74,21 @@ model_config:
     # Specify which endpoints can serve this model (optional - if not specified, uses all endpoints that list this model)
     preferred_endpoints: ["endpoint1", "endpoint3"]
     # Reasoning family - phi4 doesn't support reasoning, so omit this field
-  
+
   # Example: DeepSeek model with custom name
   "ds-v31-custom":
     reasoning_family: "deepseek"  # This model uses DeepSeek reasoning syntax
     preferred_endpoints: ["endpoint1"]
     pii_policy:
       allow_by_default: true
-  
-  # Example: Qwen3 model with custom name  
+
+  # Example: Qwen3 model with custom name
   "my-qwen3-model":
     reasoning_family: "qwen3"     # This model uses Qwen3 reasoning syntax
     preferred_endpoints: ["endpoint2"]
     pii_policy:
       allow_by_default: true
-  
+
   # Example: GPT-OSS model with custom name
   "custom-gpt-oss":
     reasoning_family: "gpt-oss"   # This model uses GPT-OSS reasoning syntax
@@ -117,161 +117,162 @@ model_config:
 # Classifier configuration for text classification
 classifier:
   category_model:
-    model_id: "models/category_classifier_modernbert-base_model" #TODO: Use local model for now before the code can download the entire model from huggingface
+    model_id: "models/category_classifier_modernbert-base_model"  # TODO: Use local model for now before the code can download the entire model from huggingface
     use_modernbert: true
     threshold: 0.6
     use_cpu: true
     category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
   pii_model:
-    model_id: "models/pii_classifier_modernbert-base_presidio_token_model" #TODO: Use local model for now before the code can download the entire model from huggingface
+    model_id: "models/pii_classifier_modernbert-base_presidio_token_model"  # TODO: Use local model for now before the code can download the entire model from huggingface
     use_modernbert: true
     threshold: 0.7
     use_cpu: true
     pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
 categories:
-- name: business
-  use_reasoning: false
-  reasoning_description: "Business content is typically conversational"
-  reasoning_effort: low  # Business conversations need low reasoning effort
-  model_scores:
-  - model: phi4
-    score: 0.8
-  - model: gemma3:27b
-    score: 0.4
-  - model: mistral-small3.1
-    score: 0.2
-- name: law
-  use_reasoning: false
-  reasoning_description: "Legal content is typically explanatory"
-  model_scores:
-  - model: gemma3:27b
-    score: 0.8
-  - model: phi4
-    score: 0.6
-  - model: mistral-small3.1
-    score: 0.4
-- name: psychology
-  use_reasoning: false
-  reasoning_description: "Psychology content is usually explanatory"
-  model_scores:
-  - model: mistral-small3.1
-    score: 0.6
-  - model: gemma3:27b
-    score: 0.4
-  - model: phi4
-    score: 0.4
-- name: biology
-  use_reasoning: true
-  reasoning_description: "Biological processes benefit from structured analysis"
-  model_scores:
-  - model: mistral-small3.1
-    score: 0.8
-  - model: gemma3:27b
-    score: 0.6
-  - model: phi4
-    score: 0.2
-- name: chemistry
-  use_reasoning: true
-  reasoning_description: "Chemical reactions and formulas require systematic thinking"
-  reasoning_effort: high  # Chemistry requires high reasoning effort
-  model_scores:
-  - model: mistral-small3.1
-    score: 0.8
-  - model: gemma3:27b
-    score: 0.6
-  - model: phi4
-    score: 0.6
-- name: history
-  use_reasoning: false
-  reasoning_description: "Historical content is narrative-based"
-  model_scores:
-  - model: mistral-small3.1
-    score: 0.8
-  - model: phi4
-    score: 0.6
-  - model: gemma3:27b
-    score: 0.4
-- name: other
-  use_reasoning: false
-  reasoning_description: "General content doesn't require reasoning"
-  model_scores:
-  - model: gemma3:27b
-    score: 0.8
-  - model: phi4
-    score: 0.6
-  - model: mistral-small3.1
-    score: 0.6
-- name: health
-  use_reasoning: false
-  reasoning_description: "Health information is typically informational"
-  model_scores:
-  - model: gemma3:27b
-    score: 0.8
-  - model: phi4
-    score: 0.8
-  - model: mistral-small3.1
-    score: 0.6
-- name: economics
-  use_reasoning: false
-  reasoning_description: "Economic discussions are usually explanatory"
-  model_scores:
-  - model: gemma3:27b
-    score: 0.8
-  - model: mistral-small3.1
-    score: 0.8
-  - model: phi4
-    score: 0.0
-- name: math
-  use_reasoning: true
-  reasoning_description: "Mathematical problems require step-by-step reasoning"
-  reasoning_effort: high  # Math problems need high reasoning effort
-  model_scores:
-  - model: phi4
-    score: 1.0
-  - model: mistral-small3.1
-    score: 0.8
-  - model: gemma3:27b
-    score: 0.6
-- name: physics
-  use_reasoning: true
-  reasoning_description: "Physics concepts need logical analysis"
-  model_scores:
-  - model: gemma3:27b
-    score: 0.4
-  - model: phi4
-    score: 0.4
-  - model: mistral-small3.1
-    score: 0.4
-- name: computer science
-  use_reasoning: true
-  reasoning_description: "Programming and algorithms need logical reasoning"
-  model_scores:
-  - model: gemma3:27b
-    score: 0.6
-  - model: mistral-small3.1
-    score: 0.6
-  - model: phi4
-    score: 0.0
-- name: philosophy
-  use_reasoning: false
-  reasoning_description: "Philosophical discussions are conversational"
-  model_scores:
-  - model: phi4
-    score: 0.6
-  - model: gemma3:27b
-    score: 0.2
-  - model: mistral-small3.1
-    score: 0.2
-- name: engineering
-  use_reasoning: true
-  reasoning_description: "Engineering problems require systematic problem-solving"
-  model_scores:
-  - model: gemma3:27b
-    score: 0.6
-  - model: mistral-small3.1
-    score: 0.6
-  - model: phi4
-    score: 0.2
+  - name: business
+    use_reasoning: false
+    reasoning_description: "Business content is typically conversational"
+    reasoning_effort: low  # Business conversations need low reasoning effort
+    model_scores:
+      - model: phi4
+        score: 0.8
+      - model: gemma3:27b
+        score: 0.4
+      - model: mistral-small3.1
+        score: 0.2
+  - name: law
+    use_reasoning: false
+    reasoning_description: "Legal content is typically explanatory"
+    model_scores:
+      - model: gemma3:27b
+        score: 0.8
+      - model: phi4
+        score: 0.6
+      - model: mistral-small3.1
+        score: 0.4
+  - name: psychology
+    use_reasoning: false
+    reasoning_description: "Psychology content is usually explanatory"
+    model_scores:
+      - model: mistral-small3.1
+        score: 0.6
+      - model: gemma3:27b
+        score: 0.4
+      - model: phi4
+        score: 0.4
+  - name: biology
+    use_reasoning: true
+    reasoning_description: "Biological processes benefit from structured analysis"
+    model_scores:
+      - model: mistral-small3.1
+        score: 0.8
+      - model: gemma3:27b
+        score: 0.6
+      - model: phi4
+        score: 0.2
+  - name: chemistry
+    use_reasoning: true
+    reasoning_description: "Chemical reactions and formulas require systematic thinking"
+    reasoning_effort: high  # Chemistry requires high reasoning effort
+    model_scores:
+      - model: mistral-small3.1
+        score: 0.8
+      - model: gemma3:27b
+        score: 0.6
+      - model: phi4
+        score: 0.6
+  - name: history
+    use_reasoning: false
+    reasoning_description: "Historical content is narrative-based"
+    model_scores:
+      - model: mistral-small3.1
+        score: 0.8
+      - model: phi4
+        score: 0.6
+      - model: gemma3:27b
+        score: 0.4
+  - name: other
+    use_reasoning: false
+    reasoning_description: "General content doesn't require reasoning"
+    model_scores:
+      - model: gemma3:27b
+        score: 0.8
+      - model: phi4
+        score: 0.6
+      - model: mistral-small3.1
+        score: 0.6
+  - name: health
+    use_reasoning: false
+    reasoning_description: "Health information is typically informational"
+    model_scores:
+      - model: gemma3:27b
+        score: 0.8
+      - model: phi4
+        score: 0.8
+      - model: mistral-small3.1
+        score: 0.6
+  - name: economics
+    use_reasoning: false
+    reasoning_description: "Economic discussions are usually explanatory"
+    model_scores:
+      - model: gemma3:27b
+        score: 0.8
+      - model: mistral-small3.1
+        score: 0.8
+      - model: phi4
+        score: 0.0
+  - name: math
+    use_reasoning: true
+    reasoning_description: "Mathematical problems require step-by-step reasoning"
+    reasoning_effort: high  # Math problems need high reasoning effort
+    model_scores:
+      - model: phi4
+        score: 1.0
+      - model: mistral-small3.1
+        score: 0.8
+      - model: gemma3:27b
+        score: 0.6
+  - name: physics
+    use_reasoning: true
+    reasoning_description: "Physics concepts need logical analysis"
+    model_scores:
+      - model: gemma3:27b
+        score: 0.4
+      - model: phi4
+        score: 0.4
+      - model: mistral-small3.1
+        score: 0.4
+  - name: computer science
+    use_reasoning: true
+    reasoning_description: "Programming and algorithms need logical reasoning"
+    model_scores:
+      - model: gemma3:27b
+        score: 0.6
+      - model: mistral-small3.1
+        score: 0.6
+      - model: phi4
+        score: 0.0
+  - name: philosophy
+    use_reasoning: false
+    reasoning_description: "Philosophical discussions are conversational"
+    model_scores:
+      - model: phi4
+        score: 0.6
+      - model: gemma3:27b
+        score: 0.2
+      - model: mistral-small3.1
+        score: 0.2
+  - name: engineering
+    use_reasoning: true
+    reasoning_description: "Engineering problems require systematic problem-solving"
+    model_scores:
+      - model: gemma3:27b
+        score: 0.6
+      - model: mistral-small3.1
+        score: 0.6
+      - model: phi4
+        score: 0.2
+
 default_model: mistral-small3.1
 
 # API Configuration
@@ -292,18 +293,18 @@ reasoning_families:
   deepseek:
     type: "chat_template_kwargs"
     parameter: "thinking"
-  
+
   qwen3:
     type: "chat_template_kwargs"
     parameter: "enable_thinking"
-  
+
   gpt-oss:
     type: "reasoning_effort"
     parameter: "reasoning_effort"
-  
+
   gpt:
     type: "reasoning_effort"
     parameter: "reasoning_effort"
 
 # Global default reasoning effort level
-default_reasoning_effort: medium  # Default reasoning effort level (low, medium, high) 
+default_reasoning_effort: medium  # Default reasoning effort level (low, medium, high)
diff --git a/deploy/kubernetes/config.yaml b/deploy/kubernetes/config.yaml
index 358fb0cd..226834e7 100644
--- a/deploy/kubernetes/config.yaml
+++ b/deploy/kubernetes/config.yaml
@@ -67,7 +67,7 @@ model_config:
 # Classifier configuration for text classification
 classifier:
   category_model:
-    model_id: "models/category_classifier_modernbert-base_model" #TODO: Use local model for now before the code can download the entire model from huggingface
+    model_id: "models/category_classifier_modernbert-base_model"  # TODO: Use local model for now before the code can download the entire model from huggingface
     use_modernbert: true
     threshold: 0.6
     use_cpu: true
@@ -79,116 +79,117 @@ classifier:
     use_cpu: true
     pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
 categories:
-- name: business
-  model_scores:
-  - model: phi4
-    score: 0.8
-  - model: gemma3:27b
-    score: 0.4
-  - model: mistral-small3.1
-    score: 0.2
-- name: law
-  model_scores:
-  - model: gemma3:27b
-    score: 0.8
-  - model: phi4
-    score: 0.6
-  - model: mistral-small3.1
-    score: 0.4
-- name: psychology
-  model_scores:
-  - model: mistral-small3.1
-    score: 0.6
-  - model: gemma3:27b
-    score: 0.4
-  - model: phi4
-    score: 0.4
-- name: biology
-  model_scores:
-  - model: mistral-small3.1
-    score: 0.8
-  - model: gemma3:27b
-    score: 0.6
-  - model: phi4
-    score: 0.2
-- name: chemistry
-  model_scores:
-  - model: mistral-small3.1
-    score: 0.8
-  - model: gemma3:27b
-    score: 0.6
-  - model: phi4
-    score: 0.6
-- name: history
-  model_scores:
-  - model: mistral-small3.1
-    score: 0.8
-  - model: phi4
-    score: 0.6
-  - model: gemma3:27b
-    score: 0.4
-- name: other
-  model_scores:
-  - model: gemma3:27b
-    score: 0.8
-  - model: phi4
-    score: 0.6
-  - model: mistral-small3.1
-    score: 0.6
-- name: health
-  model_scores:
-  - model: gemma3:27b
-    score: 0.8
-  - model: phi4
-    score: 0.8
-  - model: mistral-small3.1
-    score: 0.6
-- name: economics
-  model_scores:
-  - model: gemma3:27b
-    score: 0.8
-  - model: mistral-small3.1
-    score: 0.8
-  - model: phi4
-    score: 0.0
-- name: math
-  model_scores:
-  - model: phi4
-    score: 1.0
-  - model: mistral-small3.1
-    score: 0.8
-  - model: gemma3:27b
-    score: 0.6
-- name: physics
-  model_scores:
-  - model: gemma3:27b
-    score: 0.4
-  - model: phi4
-    score: 0.4
-  - model: mistral-small3.1
-    score: 0.4
-- name: computer science
-  model_scores:
-  - model: gemma3:27b
-    score: 0.6
-  - model: mistral-small3.1
-    score: 0.6
-  - model: phi4
-    score: 0.0
-- name: philosophy
-  model_scores:
-  - model: phi4
-    score: 0.6
-  - model: gemma3:27b
-    score: 0.2
-  - model: mistral-small3.1
-    score: 0.2
-- name: engineering
-  model_scores:
-  - model: gemma3:27b
-    score: 0.6
-  - model: mistral-small3.1
-    score: 0.6
-  - model: phi4
-    score: 0.2
-default_model: mistral-small3.1
\ No newline at end of file
+  - name: business
+    model_scores:
+      - model: phi4
+        score: 0.8
+      - model: gemma3:27b
+        score: 0.4
+      - model: mistral-small3.1
+        score: 0.2
+  - name: law
+    model_scores:
+      - model: gemma3:27b
+        score: 0.8
+      - model: phi4
+        score: 0.6
+      - model: mistral-small3.1
+        score: 0.4
+  - name: psychology
+    model_scores:
+      - model: mistral-small3.1
+        score: 0.6
+      - model: gemma3:27b
+        score: 0.4
+      - model: phi4
+        score: 0.4
+  - name: biology
+    model_scores:
+      - model: mistral-small3.1
+        score: 0.8
+      - model: gemma3:27b
+        score: 0.6
+      - model: phi4
+        score: 0.2
+  - name: chemistry
+    model_scores:
+      - model: mistral-small3.1
+        score: 0.8
+      - model: gemma3:27b
+        score: 0.6
+      - model: phi4
+        score: 0.6
+  - name: history
+    model_scores:
+      - model: mistral-small3.1
+        score: 0.8
+      - model: phi4
+        score: 0.6
+      - model: gemma3:27b
+        score: 0.4
+  - name: other
+    model_scores:
+      - model: gemma3:27b
+        score: 0.8
+      - model: phi4
+        score: 0.6
+      - model: mistral-small3.1
+        score: 0.6
+  - name: health
+    model_scores:
+      - model: gemma3:27b
+        score: 0.8
+      - model: phi4
+        score: 0.8
+      - model: mistral-small3.1
+        score: 0.6
+  - name: economics
+    model_scores:
+      - model: gemma3:27b
+        score: 0.8
+      - model: mistral-small3.1
+        score: 0.8
+      - model: phi4
+        score: 0.0
+  - name: math
+    model_scores:
+      - model: phi4
+        score: 1.0
+      - model: mistral-small3.1
+        score: 0.8
+      - model: gemma3:27b
+        score: 0.6
+  - name: physics
+    model_scores:
+      - model: gemma3:27b
+        score: 0.4
+      - model: phi4
+        score: 0.4
+      - model: mistral-small3.1
+        score: 0.4
+  - name: computer science
+    model_scores:
+      - model: gemma3:27b
+        score: 0.6
+      - model: mistral-small3.1
+        score: 0.6
+      - model: phi4
+        score: 0.0
+  - name: philosophy
+    model_scores:
+      - model: phi4
+        score: 0.6
+      - model: gemma3:27b
+        score: 0.2
+      - model: mistral-small3.1
+        score: 0.2
+  - name: engineering
+    model_scores:
+      - model: gemma3:27b
+        score: 0.6
+      - model: mistral-small3.1
+        score: 0.6
+      - model: phi4
+        score: 0.2
+
+default_model: mistral-small3.1
diff --git a/deploy/kubernetes/deployment.yaml b/deploy/kubernetes/deployment.yaml
index c0564a6b..45ab8e98 100644
--- a/deploy/kubernetes/deployment.yaml
+++ b/deploy/kubernetes/deployment.yaml
@@ -23,10 +23,10 @@ spec:
           set -e
           echo "Installing Hugging Face CLI..."
           pip install --no-cache-dir huggingface_hub[cli]
-          
+
           echo "Downloading models to persistent volume..."
           cd /app/models
-          
+
           # Download category classifier model
           if [ ! -d "category_classifier_modernbert-base_model" ]; then
             echo "Downloading category classifier model..."
@@ -34,15 +34,15 @@ spec:
           else
             echo "Category classifier model already exists, skipping..."
           fi
-          
-          # Download PII classifier model  
+
+          # Download PII classifier model
           if [ ! -d "pii_classifier_modernbert-base_model" ]; then
             echo "Downloading PII classifier model..."
             huggingface-cli download LLM-Semantic-Router/pii_classifier_modernbert-base_model --local-dir pii_classifier_modernbert-base_model
           else
             echo "PII classifier model already exists, skipping..."
           fi
-          
+
           # Download jailbreak classifier model
           if [ ! -d "jailbreak_classifier_modernbert-base_model" ]; then
             echo "Downloading jailbreak classifier model..."
@@ -50,7 +50,7 @@ spec:
           else
             echo "Jailbreak classifier model already exists, skipping..."
           fi
-          
+
           # Download PII token classifier model
           if [ ! -d "pii_classifier_modernbert-base_presidio_token_model" ]; then
             echo "Downloading PII token classifier model..."
@@ -58,7 +58,7 @@ spec:
           else
             echo "PII token classifier model already exists, skipping..."
           fi
-          
+
           echo "All models downloaded successfully!"
           ls -la /app/models/
         env:
diff --git a/deploy/kubernetes/kustomization.yaml b/deploy/kubernetes/kustomization.yaml
index 90bd3006..8160564b 100644
--- a/deploy/kubernetes/kustomization.yaml
+++ b/deploy/kubernetes/kustomization.yaml
@@ -21,6 +21,5 @@ configMapGenerator:
 namespace: semantic-router
 
 images:
- - name: ghcr.io/vllm-project/semantic-router/extproc
-   newTag: latest
-
+- name: ghcr.io/vllm-project/semantic-router/extproc
+  newTag: latest
diff --git a/docker-compose.yml b/docker-compose.yml
index 2b34eaf8..09f7b9ad 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -16,7 +16,7 @@ services:
     networks:
       - semantic-network
     healthcheck:
-      test: ["CMD", "curl","-f", "localhost:8080/health"]
+      test: ["CMD", "curl", "-f", "localhost:8080/health"]
       interval: 10s
       timeout: 5s
       retries: 5