Skip to content

Commit c0f5bd8

Browse files
authored
feat(minor): add --printTimings option to the chat CLI command (#138)
1 parent 57d83a2 commit c0f5bd8

File tree

4 files changed

+26
-3
lines changed

4 files changed

+26
-3
lines changed

llama/addon.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,13 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
215215
Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
216216
return Napi::Number::From(info.Env(), llama_n_ctx(ctx));
217217
}
218+
219+
Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
220+
llama_print_timings(ctx);
221+
llama_reset_timings(ctx);
222+
return info.Env().Undefined();
223+
}
224+
218225
Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
219226
int token = info[0].As<Napi::Number>().Int32Value();
220227
std::stringstream ss;
@@ -242,6 +249,7 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
242249
InstanceMethod("getContextSize", &LLAMAContext::GetContextSize),
243250
InstanceMethod("getTokenString", &LLAMAContext::GetTokenString),
244251
InstanceMethod("eval", &LLAMAContext::Eval),
252+
InstanceMethod("printTimings", &LLAMAContext::PrintTimings),
245253
}));
246254
}
247255
};

src/cli/commands/ChatCommand.ts

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ const modelWrappers = ["auto", "general", "llamaChat", "chatML", "falconChat"] a
2121
type ChatCommand = {
2222
model: string,
2323
systemInfo: boolean,
24+
printTimings: boolean,
2425
systemPrompt: string,
2526
prompt?: string,
2627
wrapper: (typeof modelWrappers)[number],
@@ -62,6 +63,12 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
6263
description: "Print llama.cpp system info",
6364
group: "Optional:"
6465
})
66+
.option("printTimings", {
67+
type: "boolean",
68+
default: false,
69+
description: "Print llama.cpp timings",
70+
group: "Optional:"
71+
})
6572
.option("systemPrompt", {
6673
alias: "s",
6774
type: "string",
@@ -191,13 +198,13 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
191198
model, systemInfo, systemPrompt, prompt, wrapper, contextSize,
192199
grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP,
193200
gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
194-
repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory
201+
repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, printTimings
195202
}) {
196203
try {
197204
await RunChat({
198205
model, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar, jsonSchemaGrammarFile, threads, temperature, topK,
199206
topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty,
200-
repeatPresencePenalty, maxTokens, noHistory
207+
repeatPresencePenalty, maxTokens, noHistory, printTimings
201208
});
202209
} catch (err) {
203210
console.error(err);
@@ -210,7 +217,7 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
210217
async function RunChat({
211218
model: modelArg, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar: grammarArg,
212219
jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty,
213-
penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory
220+
penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, printTimings
214221
}: ChatCommand) {
215222
const {LlamaChatSession} = await import("../../llamaEvaluator/LlamaChatSession.js");
216223
const {LlamaModel} = await import("../../llamaEvaluator/LlamaModel.js");
@@ -340,6 +347,9 @@ async function RunChat({
340347
});
341348
process.stdout.write(endColor);
342349
console.log();
350+
351+
if (printTimings)
352+
context.printTimings();
343353
}
344354
}
345355

src/llamaEvaluator/LlamaContext.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,10 @@ export class LlamaContext {
201201
return this._ctx.getContextSize();
202202
}
203203

204+
public printTimings() {
205+
this._ctx.printTimings();
206+
}
207+
204208
/**
205209
* @param {Uint32Array} tokens
206210
* @param {object} options

src/utils/getBin.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ export type LLAMAContext = {
138138
tokenNl(): number,
139139
getContextSize(): number
140140
getTokenString(token: number): string
141+
printTimings(): void
141142
};
142143

143144
export type LLAMAGrammar = {

0 commit comments

Comments
 (0)