Skip to content

Commit dc530d6

Browse files
committed
chore: merge master into beta
1 parent 9d033a6 commit dc530d6

File tree

4 files changed

+29
-6
lines changed

4 files changed

+29
-6
lines changed

llama/addon.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,12 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
559559
return result;
560560
}
561561

562+
Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
563+
llama_print_timings(ctx);
564+
llama_reset_timings(ctx);
565+
return info.Env().Undefined();
566+
}
567+
562568
static void init(Napi::Object exports) {
563569
exports.Set(
564570
"AddonContext",
@@ -576,6 +582,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
576582
InstanceMethod("sampleToken", &AddonContext::SampleToken),
577583
InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
578584
InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
585+
InstanceMethod("printTimings", &AddonContext::PrintTimings),
579586
InstanceMethod("dispose", &AddonContext::Dispose)
580587
}
581588
)

src/cli/commands/ChatCommand.ts

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ type ChatCommand = {
4343
repeatPresencePenalty?: number,
4444
maxTokens: number,
4545
noHistory: boolean,
46-
environmentFunctions: boolean
46+
environmentFunctions: boolean,
47+
printTimings: boolean
4748
};
4849

4950
export const ChatCommand: CommandModule<object, ChatCommand> = {
@@ -197,20 +198,27 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
197198
default: false,
198199
description: "Provide access to environment functions like `getDate` and `getTime`",
199200
group: "Optional:"
201+
})
202+
.option("printTimings", {
203+
alias: "pt",
204+
type: "boolean",
205+
default: false,
206+
description: "Print llama.cpp timings after each response",
207+
group: "Optional:"
200208
});
201209
},
202210
async handler({
203211
model, systemInfo, systemPrompt, prompt, wrapper, contextSize,
204212
grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP,
205213
gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
206214
repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory,
207-
environmentFunctions
215+
environmentFunctions, printTimings
208216
}) {
209217
try {
210218
await RunChat({
211219
model, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar, jsonSchemaGrammarFile, threads, temperature, topK,
212220
topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty,
213-
repeatPresencePenalty, maxTokens, noHistory, environmentFunctions
221+
repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, printTimings
214222
});
215223
} catch (err) {
216224
console.error(err);
@@ -223,7 +231,8 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
223231
async function RunChat({
224232
model: modelArg, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar: grammarArg,
225233
jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty,
226-
penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions
234+
penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions,
235+
printTimings
227236
}: ChatCommand) {
228237
const {LlamaChatSession} = await import("../../llamaEvaluator/LlamaChatSession/LlamaChatSession.js");
229238
const {LlamaModel} = await import("../../llamaEvaluator/LlamaModel.js");
@@ -370,6 +379,9 @@ async function RunChat({
370379
});
371380
process.stdout.write(endColor);
372381
console.log();
382+
383+
if (printTimings)
384+
context.printTimings();
373385
}
374386
}
375387

src/llamaEvaluator/LlamaContext/LlamaContext.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,10 @@ export class LlamaContext {
320320
});
321321
}
322322

323+
public printTimings() {
324+
this._ctx.printTimings();
325+
}
326+
323327
/** @internal */
324328
public async _decodeTokens<T>({
325329
sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5

src/utils/getBin.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,8 @@ export type AddonContext = {
174174
shiftSequenceTokenCells(sequenceId: number, startPos: number, endPos: number, shiftDelta: number): void,
175175

176176
acceptGrammarEvaluationStateToken(grammarEvaluationState: AddonGrammarEvaluationState, token: Token): void,
177-
178-
getEmbedding(): Float64Array
177+
getEmbedding(): Float64Array,
178+
printTimings(): void
179179
};
180180

181181
export type BatchLogitIndex = number & {

0 commit comments

Comments
 (0)