@@ -364,18 +364,30 @@ Enable the trait in Package.swift:
364364)
365365```
366366
367- Custom generation options for llama.cpp provide fine-grained
368- control over sampling parameters:
367+ Configuration is done via custom generation options,
368+ allowing you to control runtime parameters per request :
369369
370370``` swift
371371var options = GenerationOptions (temperature : 0.8 )
372372options[custom : LlamaLanguageModel.self ] = .init (
373- repeatPenalty : 1.2 ,
374- repeatLastN : 128 ,
375- frequencyPenalty : 0.1 ,
376- presencePenalty : 0.1 ,
373+ contextSize : 4096 , // Context window size
374+ batchSize : 512 , // Batch size for evaluation
375+ threads : 8 , // Number of threads
376+ seed : 42 , // Random seed for deterministic output
377+ temperature : 0.7 , // Sampling temperature
378+ topK : 40 , // Top-K sampling
379+ topP : 0.95 , // Top-P (nucleus) sampling
380+ repeatPenalty : 1.2 , // Penalty for repeated tokens
381+ repeatLastN : 128 , // Number of tokens to consider for repeat penalty
382+ frequencyPenalty : 0.1 , // Frequency-based penalty
383+ presencePenalty : 0.1 , // Presence-based penalty
377384 mirostat : .v2 (tau : 5.0 , eta : 0.1 ) // Adaptive perplexity control
378385)
386+
387+ let response = try await session.respond (
388+ to : " Write a story" ,
389+ options : options
390+ )
379391```
380392
381393> [ !NOTE]
0 commit comments