@@ -21,6 +21,7 @@ const modelWrappers = ["auto", "general", "llamaChat", "chatML", "falconChat"] a
21
21
type ChatCommand = {
22
22
model : string ,
23
23
systemInfo : boolean ,
24
+ printTimings : boolean ,
24
25
systemPrompt : string ,
25
26
prompt ?: string ,
26
27
wrapper : ( typeof modelWrappers ) [ number ] ,
@@ -62,6 +63,12 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
62
63
description : "Print llama.cpp system info" ,
63
64
group : "Optional:"
64
65
} )
66
+ . option ( "printTimings" , {
67
+ type : "boolean" ,
68
+ default : false ,
69
+ description : "Print llama.cpp timings" ,
70
+ group : "Optional:"
71
+ } )
65
72
. option ( "systemPrompt" , {
66
73
alias : "s" ,
67
74
type : "string" ,
@@ -191,13 +198,13 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
191
198
model, systemInfo, systemPrompt, prompt, wrapper, contextSize,
192
199
grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP,
193
200
gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
194
- repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory
201
+ repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, printTimings
195
202
} ) {
196
203
try {
197
204
await RunChat ( {
198
205
model, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar, jsonSchemaGrammarFile, threads, temperature, topK,
199
206
topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty,
200
- repeatPresencePenalty, maxTokens, noHistory
207
+ repeatPresencePenalty, maxTokens, noHistory, printTimings
201
208
} ) ;
202
209
} catch ( err ) {
203
210
console . error ( err ) ;
@@ -210,7 +217,7 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
210
217
async function RunChat ( {
211
218
model : modelArg , systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar : grammarArg ,
212
219
jsonSchemaGrammarFile : jsonSchemaGrammarFilePath , threads, temperature, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty,
213
- penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory
220
+ penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, printTimings
214
221
} : ChatCommand ) {
215
222
const { LlamaChatSession} = await import ( "../../llamaEvaluator/LlamaChatSession.js" ) ;
216
223
const { LlamaModel} = await import ( "../../llamaEvaluator/LlamaModel.js" ) ;
@@ -340,6 +347,9 @@ async function RunChat({
340
347
} ) ;
341
348
process . stdout . write ( endColor ) ;
342
349
console . log ( ) ;
350
+
351
+ if ( printTimings )
352
+ context . printTimings ( ) ;
343
353
}
344
354
}
345
355
0 commit comments