@@ -21,6 +21,7 @@ const modelWrappers = ["auto", "general", "llamaChat", "chatML", "falconChat"] a
2121type ChatCommand = {
2222 model : string ,
2323 systemInfo : boolean ,
24+ printTimings : boolean ,
2425 systemPrompt : string ,
2526 prompt ?: string ,
2627 wrapper : ( typeof modelWrappers ) [ number ] ,
@@ -62,6 +63,12 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
6263 description : "Print llama.cpp system info" ,
6364 group : "Optional:"
6465 } )
66+ . option ( "printTimings" , {
67+ type : "boolean" ,
68+ default : false ,
69+ description : "Print llama.cpp timings" ,
70+ group : "Optional:"
71+ } )
6572 . option ( "systemPrompt" , {
6673 alias : "s" ,
6774 type : "string" ,
@@ -191,13 +198,13 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
191198 model, systemInfo, systemPrompt, prompt, wrapper, contextSize,
192199 grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP,
193200 gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
194- repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory
201+ repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, printTimings
195202 } ) {
196203 try {
197204 await RunChat ( {
198205 model, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar, jsonSchemaGrammarFile, threads, temperature, topK,
199206 topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty,
200- repeatPresencePenalty, maxTokens, noHistory
207+ repeatPresencePenalty, maxTokens, noHistory, printTimings
201208 } ) ;
202209 } catch ( err ) {
203210 console . error ( err ) ;
@@ -210,7 +217,7 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
210217async function RunChat ( {
211218 model : modelArg , systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar : grammarArg ,
212219 jsonSchemaGrammarFile : jsonSchemaGrammarFilePath , threads, temperature, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty,
213- penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory
220+ penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, printTimings
214221} : ChatCommand ) {
215222 const { LlamaChatSession} = await import ( "../../llamaEvaluator/LlamaChatSession.js" ) ;
216223 const { LlamaModel} = await import ( "../../llamaEvaluator/LlamaModel.js" ) ;
@@ -340,6 +347,9 @@ async function RunChat({
340347 } ) ;
341348 process . stdout . write ( endColor ) ;
342349 console . log ( ) ;
350+
351+ if ( printTimings )
352+ context . printTimings ( ) ;
343353 }
344354}
345355
0 commit comments