Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -364,18 +364,30 @@ Enable the trait in Package.swift:
)
```

Custom generation options for llama.cpp provide fine-grained
control over sampling parameters:
Configuration is done via custom generation options,
allowing you to control runtime parameters per request:

```swift
var options = GenerationOptions(temperature: 0.8)
options[custom: LlamaLanguageModel.self] = .init(
repeatPenalty: 1.2,
repeatLastN: 128,
frequencyPenalty: 0.1,
presencePenalty: 0.1,
contextSize: 4096, // Context window size
batchSize: 512, // Batch size for evaluation
threads: 8, // Number of threads
seed: 42, // Random seed for deterministic output
temperature: 0.7, // Sampling temperature
topK: 40, // Top-K sampling
topP: 0.95, // Top-P (nucleus) sampling
repeatPenalty: 1.2, // Penalty for repeated tokens
repeatLastN: 128, // Number of tokens to consider for repeat penalty
frequencyPenalty: 0.1, // Frequency-based penalty
presencePenalty: 0.1, // Presence-based penalty
mirostat: .v2(tau: 5.0, eta: 0.1) // Adaptive perplexity control
)

let response = try await session.respond(
to: "Write a story",
options: options
)
```

> [!NOTE]
Expand Down
79 changes: 61 additions & 18 deletions Sources/AnyLanguageModel/Models/LlamaLanguageModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,8 @@ import Foundation

/// The context size for the model.
///
/// - Important: This property is deprecated. Use ``GenerationOptions`` with
/// custom options instead:
/// - Important: This property is deprecated.
/// Use ``GenerationOptions`` with custom options instead:
/// ```swift
/// var options = GenerationOptions()
/// options[custom: LlamaLanguageModel.self] = .init(contextSize: 4096)
Expand All @@ -204,57 +204,91 @@ import Foundation

/// The batch size for processing.
///
/// - Important: This property is deprecated. Use ``GenerationOptions`` with
/// custom options instead.
/// - Important: This property is deprecated.
/// Use ``GenerationOptions`` with custom options instead:
/// ```swift
/// var options = GenerationOptions()
/// options[custom: LlamaLanguageModel.self] = .init(batchSize: 1024)
/// ```
@available(*, deprecated, message: "Use GenerationOptions custom options instead")
public var batchSize: UInt32 { legacyDefaults.batchSize }

/// The number of threads to use.
///
/// - Important: This property is deprecated. Use ``GenerationOptions`` with
/// - Important: This property is deprecated.
/// Use ``GenerationOptions`` with custom options instead:
/// ```swift
/// var options = GenerationOptions()
/// options[custom: LlamaLanguageModel.self] = .init(threads: 8)
/// ```
/// custom options instead.
@available(*, deprecated, message: "Use GenerationOptions custom options instead")
public var threads: Int32 { legacyDefaults.threads }

/// The random seed for generation.
///
/// - Important: This property is deprecated. Use ``GenerationOptions`` with
/// - Important: This property is deprecated.
/// Use ``GenerationOptions`` with custom options instead:
/// ```swift
/// var options = GenerationOptions()
/// options[custom: LlamaLanguageModel.self] = .init(seed: 42)
/// ```
/// custom options instead.
@available(*, deprecated, message: "Use GenerationOptions custom options instead")
public var seed: UInt32 { legacyDefaults.seed }

/// The temperature for sampling.
///
/// - Important: This property is deprecated. Use ``GenerationOptions`` with
/// custom options instead.
/// - Important: This property is deprecated.
/// Use ``GenerationOptions`` with custom options instead:
/// ```swift
/// var options = GenerationOptions()
/// options[custom: LlamaLanguageModel.self] = .init(temperature: 0.6)
/// ```
@available(*, deprecated, message: "Use GenerationOptions custom options instead")
public var temperature: Float { legacyDefaults.temperature }

/// The top-K sampling parameter.
///
/// - Important: This property is deprecated. Use ``GenerationOptions`` with
/// custom options instead.
/// - Important: This property is deprecated.
/// Use ``GenerationOptions`` with custom options instead:
/// ```swift
/// var options = GenerationOptions()
/// options[custom: LlamaLanguageModel.self] = .init(topK: 25)
/// ```
@available(*, deprecated, message: "Use GenerationOptions custom options instead")
public var topK: Int32 { legacyDefaults.topK }

/// The top-P (nucleus) sampling parameter.
///
/// - Important: This property is deprecated. Use ``GenerationOptions`` with
/// custom options instead.
/// - Important: This property is deprecated.
/// Use ``GenerationOptions`` with custom options instead:
/// ```swift
/// var options = GenerationOptions()
/// options[custom: LlamaLanguageModel.self] = .init(topP: 0.9)
/// ```
@available(*, deprecated, message: "Use GenerationOptions custom options instead")
public var topP: Float { legacyDefaults.topP }

/// The repeat penalty for generation.
///
/// - Important: This property is deprecated. Use ``GenerationOptions`` with
/// custom options instead.
/// - Important: This property is deprecated.
/// Use ``GenerationOptions`` with custom options instead:
/// ```swift
/// var options = GenerationOptions()
/// options[custom: LlamaLanguageModel.self] = .init(repeatPenalty: 1.2)
/// ```
@available(*, deprecated, message: "Use GenerationOptions custom options instead")
public var repeatPenalty: Float { legacyDefaults.repeatPenalty }

/// The number of tokens to consider for repeat penalty.
///
/// - Important: This property is deprecated. Use ``GenerationOptions`` with
/// custom options instead.
/// - Important: This property is deprecated.
/// Use ``GenerationOptions`` with custom options instead:
/// ```swift
/// var options = GenerationOptions()
/// options[custom: LlamaLanguageModel.self] = .init(repeatLastN: 128)
/// ```
@available(*, deprecated, message: "Use GenerationOptions custom options instead")
public var repeatLastN: Int32 { legacyDefaults.repeatLastN }

Expand Down Expand Up @@ -397,9 +431,18 @@ import Foundation

/// Creates a Llama language model using legacy parameter defaults.
///
/// - Important: This initializer is deprecated. Use
/// `init(modelPath:)` and configure per-request values via
/// - Important: This initializer is deprecated.
/// Use `init(modelPath:)` and configure per-request values via
/// ``GenerationOptions`` custom options instead.
///
/// ```swift
/// let model = LlamaLanguageModel(modelPath: "/path/to/model.gguf")
/// var options = GenerationOptions()
/// options[custom: LlamaLanguageModel.self] = .init(contextSize: 4096)
///
/// let session = LanguageModelSession(model: model)
/// session.respond(to: "Hello, world!", options: options)
/// ```
@available(
*,
deprecated,
Expand Down