mattt · mattt · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025
diff --git a/README.md b/README.md
@@ -364,18 +364,30 @@ Enable the trait in Package.swift:
 )
 ```
 
-Custom generation options for llama.cpp provide fine-grained
-control over sampling parameters:
+Configuration is done via custom generation options,
+allowing you to control runtime parameters per request:
 
 ```swift
 var options = GenerationOptions(temperature: 0.8)
 options[custom: LlamaLanguageModel.self] = .init(
-    repeatPenalty: 1.2,
-    repeatLastN: 128,
-    frequencyPenalty: 0.1,
-    presencePenalty: 0.1,
+    contextSize: 4096,        // Context window size
+    batchSize: 512,           // Batch size for evaluation
+    threads: 8,               // Number of threads
+    seed: 42,                 // Random seed for deterministic output
+    temperature: 0.7,         // Sampling temperature
+    topK: 40,                 // Top-K sampling
+    topP: 0.95,               // Top-P (nucleus) sampling
+    repeatPenalty: 1.2,       // Penalty for repeated tokens
+    repeatLastN: 128,         // Number of tokens to consider for repeat penalty
+    frequencyPenalty: 0.1,    // Frequency-based penalty
+    presencePenalty: 0.1,     // Presence-based penalty
     mirostat: .v2(tau: 5.0, eta: 0.1)  // Adaptive perplexity control
 )
+
+let response = try await session.respond(
+    to: "Write a story",
+    options: options
+)
 ```
 
 > [!NOTE]

diff --git a/Sources/AnyLanguageModel/Models/LlamaLanguageModel.swift b/Sources/AnyLanguageModel/Models/LlamaLanguageModel.swift
@@ -193,8 +193,8 @@ import Foundation
 
         /// The context size for the model.
         ///
-        /// - Important: This property is deprecated. Use ``GenerationOptions`` with
-        ///   custom options instead:
+        /// - Important: This property is deprecated.
+        ///   Use ``GenerationOptions`` with custom options instead:
         ///   ```swift
         ///   var options = GenerationOptions()
         ///   options[custom: LlamaLanguageModel.self] = .init(contextSize: 4096)
@@ -204,57 +204,91 @@ import Foundation
 
         /// The batch size for processing.
         ///
-        /// - Important: This property is deprecated. Use ``GenerationOptions`` with
-        ///   custom options instead.
+        /// - Important: This property is deprecated.
+        ///   Use ``GenerationOptions`` with custom options instead:
+        ///   ```swift
+        ///   var options = GenerationOptions()
+        ///   options[custom: LlamaLanguageModel.self] = .init(batchSize: 1024)
+        ///   ```
         @available(*, deprecated, message: "Use GenerationOptions custom options instead")
         public var batchSize: UInt32 { legacyDefaults.batchSize }
 
         /// The number of threads to use.
         ///
-        /// - Important: This property is deprecated. Use ``GenerationOptions`` with
+        /// - Important: This property is deprecated.
+        ///   Use ``GenerationOptions`` with custom options instead:
+        ///   ```swift
+        ///   var options = GenerationOptions()
+        ///   options[custom: LlamaLanguageModel.self] = .init(threads: 8)
+        ///   ```
         ///   custom options instead.
         @available(*, deprecated, message: "Use GenerationOptions custom options instead")
         public var threads: Int32 { legacyDefaults.threads }
 
         /// The random seed for generation.
         ///
-        /// - Important: This property is deprecated. Use ``GenerationOptions`` with
+        /// - Important: This property is deprecated.
+        ///   Use ``GenerationOptions`` with custom options instead:
+        ///   ```swift
+        ///   var options = GenerationOptions()
+        ///   options[custom: LlamaLanguageModel.self] = .init(seed: 42)
+        ///   ```
         ///   custom options instead.
         @available(*, deprecated, message: "Use GenerationOptions custom options instead")
         public var seed: UInt32 { legacyDefaults.seed }
 
         /// The temperature for sampling.
         ///
-        /// - Important: This property is deprecated. Use ``GenerationOptions`` with
-        ///   custom options instead.
+        /// - Important: This property is deprecated.
+        ///   Use ``GenerationOptions`` with custom options instead:
+        ///   ```swift
+        ///   var options = GenerationOptions()
+        ///   options[custom: LlamaLanguageModel.self] = .init(temperature: 0.6)
+        ///   ```
         @available(*, deprecated, message: "Use GenerationOptions custom options instead")
         public var temperature: Float { legacyDefaults.temperature }
 
         /// The top-K sampling parameter.
         ///
-        /// - Important: This property is deprecated. Use ``GenerationOptions`` with
-        ///   custom options instead.
+        /// - Important: This property is deprecated.
+        ///   Use ``GenerationOptions`` with custom options instead:
+        ///   ```swift
+        ///   var options = GenerationOptions()
+        ///   options[custom: LlamaLanguageModel.self] = .init(topK: 25)
+        ///   ```
         @available(*, deprecated, message: "Use GenerationOptions custom options instead")
         public var topK: Int32 { legacyDefaults.topK }
 
         /// The top-P (nucleus) sampling parameter.
         ///
-        /// - Important: This property is deprecated. Use ``GenerationOptions`` with
-        ///   custom options instead.
+        /// - Important: This property is deprecated.
+        ///   Use ``GenerationOptions`` with custom options instead:
+        ///   ```swift
+        ///   var options = GenerationOptions()
+        ///   options[custom: LlamaLanguageModel.self] = .init(topP: 0.9)
+        ///   ```
         @available(*, deprecated, message: "Use GenerationOptions custom options instead")
         public var topP: Float { legacyDefaults.topP }
 
         /// The repeat penalty for generation.
         ///
-        /// - Important: This property is deprecated. Use ``GenerationOptions`` with
-        ///   custom options instead.
+        /// - Important: This property is deprecated.
+        ///   Use ``GenerationOptions`` with custom options instead:
+        ///   ```swift
+        ///   var options = GenerationOptions()
+        ///   options[custom: LlamaLanguageModel.self] = .init(repeatPenalty: 1.2)
+        ///   ```
         @available(*, deprecated, message: "Use GenerationOptions custom options instead")
         public var repeatPenalty: Float { legacyDefaults.repeatPenalty }
 
         /// The number of tokens to consider for repeat penalty.
         ///
-        /// - Important: This property is deprecated. Use ``GenerationOptions`` with
-        ///   custom options instead.
+        /// - Important: This property is deprecated.
+        ///   Use ``GenerationOptions`` with custom options instead:
+        ///   ```swift
+        ///   var options = GenerationOptions()
+        ///   options[custom: LlamaLanguageModel.self] = .init(repeatLastN: 128)
+        ///   ```
         @available(*, deprecated, message: "Use GenerationOptions custom options instead")
         public var repeatLastN: Int32 { legacyDefaults.repeatLastN }
 
@@ -397,9 +431,18 @@ import Foundation
 
         /// Creates a Llama language model using legacy parameter defaults.
         ///
-        /// - Important: This initializer is deprecated. Use
-        ///   `init(modelPath:)` and configure per-request values via
+        /// - Important: This initializer is deprecated.
+        ///   Use `init(modelPath:)` and configure per-request values via
         ///   ``GenerationOptions`` custom options instead.
+        ///
+        ///   ```swift
+        ///   let model = LlamaLanguageModel(modelPath: "/path/to/model.gguf")
+        ///   var options = GenerationOptions()
+        ///   options[custom: LlamaLanguageModel.self] = .init(contextSize: 4096)
+        ///
+        ///   let session = LanguageModelSession(model: model)
+        ///   session.respond(to: "Hello, world!", options: options)
+        ///   ```
         @available(
             *,
             deprecated,