@@ -43,7 +43,8 @@ type ChatCommand = {
43
43
repeatPresencePenalty ?: number ,
44
44
maxTokens : number ,
45
45
noHistory : boolean ,
46
- environmentFunctions : boolean
46
+ environmentFunctions : boolean ,
47
+ printTimings : boolean
47
48
} ;
48
49
49
50
export const ChatCommand : CommandModule < object , ChatCommand > = {
@@ -197,20 +198,27 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
197
198
default : false ,
198
199
description : "Provide access to environment functions like `getDate` and `getTime`" ,
199
200
group : "Optional:"
201
+ } )
202
+ . option ( "printTimings" , {
203
+ alias : "pt" ,
204
+ type : "boolean" ,
205
+ default : false ,
206
+ description : "Print llama.cpp timings after each response" ,
207
+ group : "Optional:"
200
208
} ) ;
201
209
} ,
202
210
async handler ( {
203
211
model, systemInfo, systemPrompt, prompt, wrapper, contextSize,
204
212
grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP,
205
213
gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
206
214
repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory,
207
- environmentFunctions
215
+ environmentFunctions, printTimings
208
216
} ) {
209
217
try {
210
218
await RunChat ( {
211
219
model, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar, jsonSchemaGrammarFile, threads, temperature, topK,
212
220
topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty,
213
- repeatPresencePenalty, maxTokens, noHistory, environmentFunctions
221
+ repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, printTimings
214
222
} ) ;
215
223
} catch ( err ) {
216
224
console . error ( err ) ;
@@ -223,7 +231,8 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
223
231
async function RunChat ( {
224
232
model : modelArg , systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar : grammarArg ,
225
233
jsonSchemaGrammarFile : jsonSchemaGrammarFilePath , threads, temperature, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty,
226
- penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions
234
+ penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions,
235
+ printTimings
227
236
} : ChatCommand ) {
228
237
const { LlamaChatSession} = await import ( "../../llamaEvaluator/LlamaChatSession/LlamaChatSession.js" ) ;
229
238
const { LlamaModel} = await import ( "../../llamaEvaluator/LlamaModel.js" ) ;
@@ -370,6 +379,9 @@ async function RunChat({
370
379
} ) ;
371
380
process . stdout . write ( endColor ) ;
372
381
console . log ( ) ;
382
+
383
+ if ( printTimings )
384
+ context . printTimings ( ) ;
373
385
}
374
386
}
375
387
0 commit comments