Skip to content

Commit 47b476f

Browse files
authored
feat: minP support (#162)
1 parent 46235a2 commit 47b476f

File tree

7 files changed

+56
-9
lines changed

7 files changed

+56
-9
lines changed

llama/addon.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
709709
bool use_grammar = false;
710710
llama_token result;
711711
float temperature = 0.0f;
712+
float min_p = 0;
712713
int32_t top_k = 40;
713714
float top_p = 0.95f;
714715
float repeat_penalty = 1.10f; // 1.0 = disabled
@@ -732,6 +733,10 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
732733
temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
733734
}
734735

736+
if (options.Has("minP")) {
737+
min_p = options.Get("minP").As<Napi::Number>().FloatValue();
738+
}
739+
735740
if (options.Has("topK")) {
736741
top_k = options.Get("topK").As<Napi::Number>().Int32Value();
737742
}
@@ -833,6 +838,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
833838
llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
834839
llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
835840
llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
841+
llama_sample_min_p(ctx->ctx, &candidates_p, min_p, min_keep);
836842
llama_sample_temp(ctx->ctx, &candidates_p, temperature);
837843
new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
838844
}
@@ -879,7 +885,7 @@ void addonCallJsLogCallback(
879885
) {
880886
bool called = false;
881887

882-
if (env != nullptr && callback != nullptr) {
888+
if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
883889
try {
884890
callback.Call({
885891
Napi::Number::New(env, data->logLevelNumber),

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
"function-calling",
9090
"embedding",
9191
"temperature",
92+
"minP",
9293
"topK",
9394
"topP",
9495
"json-schema",

src/bindings/AddonTypes.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ export type AddonContext = {
6868
decodeBatch(): Promise<void>,
6969
sampleToken(batchLogitIndex: BatchLogitIndex, options?: {
7070
temperature?: number,
71+
minP?: number,
7172
topK?: number,
7273
topP?: number,
7374
repeatPenalty?: number,

src/cli/commands/ChatCommand.ts

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ type ChatCommand = {
4242
jsonSchemaGrammarFile?: string,
4343
threads: number,
4444
temperature: number,
45+
minP: number,
4546
topK: number,
4647
topP: number,
4748
gpuLayers?: number,
@@ -151,6 +152,13 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
151152
description: "Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The suggested temperature is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run. Set to `0` to disable.",
152153
group: "Optional:"
153154
})
155+
.option("minP", {
156+
alias: "mp",
157+
type: "number",
158+
default: 0,
159+
description: "From the next token candidates, discard the percentage of tokens with the lowest probability. For example, if set to `0.05`, 5% of the lowest probability tokens will be discarded. This is useful for generating more high-quality results when using a high temperature. Set to a value between `0` and `1` to enable. Only relevant when `temperature` is set to a value greater than `0`.",
160+
group: "Optional:"
161+
})
154162
.option("topK", {
155163
alias: "k",
156164
type: "number",
@@ -243,15 +251,15 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
243251
async handler({
244252
model, systemInfo, systemPrompt, systemPromptFile, prompt,
245253
promptFile, wrapper, contextSize, batchSize,
246-
grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP,
247-
gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
254+
grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK,
255+
topP, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
248256
repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory,
249257
environmentFunctions, noInfoLog, printTimings
250258
}) {
251259
try {
252260
await RunChat({
253261
model, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, contextSize, batchSize,
254-
grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP, gpuLayers, lastTokensRepeatPenalty,
262+
grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, gpuLayers, lastTokensRepeatPenalty,
255263
repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens,
256264
noHistory, environmentFunctions, noInfoLog, printTimings
257265
});
@@ -265,7 +273,7 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
265273

266274
async function RunChat({
267275
model: modelArg, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, contextSize, batchSize,
268-
grammar: grammarArg, jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, topK, topP, gpuLayers,
276+
grammar: grammarArg, jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, minP, topK, topP, gpuLayers,
269277
lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty,
270278
maxTokens, noHistory, environmentFunctions, noInfoLog, printTimings
271279
}: ChatCommand) {
@@ -425,6 +433,7 @@ async function RunChat({
425433
await session.prompt(input, {
426434
grammar: grammar as undefined, // this is a workaround to allow passing both `functions` and `grammar`
427435
temperature,
436+
minP,
428437
topK,
429438
topP,
430439
repeatPenalty: {

src/evaluator/LlamaChat/LlamaChat.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,17 @@ export type LLamaChatGenerateResponseOptions<Functions extends ChatModelFunction
4545
*/
4646
temperature?: number,
4747

48+
/**
49+
* From the next token candidates, discard the percentage of tokens with the lowest probability.
50+
* For example, if set to `0.05`, 5% of the lowest probability tokens will be discarded.
51+
* This is useful for generating more high-quality results when using a high temperature.
52+
* Set to a value between `0` and `1` to enable.
53+
*
54+
* Only relevant when `temperature` is set to a value greater than `0`.
55+
* Disabled by default.
56+
*/
57+
minP?: number,
58+
4859
/**
4960
* Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
5061
* An integer number between `1` and the size of the vocabulary.
@@ -260,6 +271,7 @@ export class LlamaChat {
260271
signal,
261272
maxTokens,
262273
temperature,
274+
minP,
263275
topK,
264276
topP,
265277
grammar,
@@ -535,7 +547,7 @@ export class LlamaChat {
535547

536548

537549
const evaluationIterator = this._sequence.evaluate(tokens, removeNullFields({
538-
temperature, topK, topP,
550+
temperature, minP, topK, topP,
539551
grammarEvaluationState: () => {
540552
if (inFunctionEvaluationMode)
541553
return functionsEvaluationState;

src/evaluator/LlamaChatSession/LlamaChatSession.ts

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,17 @@ export type LLamaChatPromptOptions<Functions extends ChatSessionModelFunctions |
5555
*/
5656
temperature?: number,
5757

58+
/**
59+
* From the next token candidates, discard the percentage of tokens with the lowest probability.
60+
* For example, if set to `0.05`, 5% of the lowest probability tokens will be discarded.
61+
* This is useful for generating more high-quality results when using a high temperature.
62+
* Set to a value between `0` and `1` to enable.
63+
*
64+
* Only relevant when `temperature` is set to a value greater than `0`.
65+
* Disabled by default.
66+
*/
67+
minP?: number,
68+
5869
/**
5970
* Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
6071
* An integer number between `1` and the size of the vocabulary.
@@ -233,6 +244,7 @@ export class LlamaChatSession {
233244
signal,
234245
maxTokens,
235246
temperature,
247+
minP,
236248
topK,
237249
topP,
238250
grammar,
@@ -244,7 +256,7 @@ export class LlamaChatSession {
244256
functions: functions as undefined,
245257
documentFunctionParams: documentFunctionParams as undefined,
246258

247-
onToken, signal, maxTokens, temperature, topK, topP, grammar, trimWhitespaceSuffix, repeatPenalty
259+
onToken, signal, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix, repeatPenalty
248260
});
249261

250262
return responseText;
@@ -261,6 +273,7 @@ export class LlamaChatSession {
261273
signal,
262274
maxTokens,
263275
temperature,
276+
minP,
264277
topK,
265278
topP,
266279
grammar,
@@ -309,6 +322,7 @@ export class LlamaChatSession {
309322
onToken,
310323
signal,
311324
repeatPenalty,
325+
minP,
312326
topK,
313327
topP,
314328
maxTokens,

src/evaluator/LlamaContext/LlamaContext.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,7 @@ export class LlamaContextSequence {
610610
*/
611611
public evaluate(tokens: Token[], {
612612
temperature = 0,
613+
minP = 0,
613614
topK = 40,
614615
topP = 0.95,
615616
grammarEvaluationState,
@@ -621,7 +622,7 @@ export class LlamaContextSequence {
621622
} = {},
622623
yieldEosToken = false
623624
}: {
624-
temperature?: number, topK?: number, topP?: number,
625+
temperature?: number, minP?: number, topK?: number, topP?: number,
625626
grammarEvaluationState?: LlamaGrammarEvaluationState | (() => LlamaGrammarEvaluationState | undefined),
626627
repeatPenalty?: LlamaContextSequenceRepeatPenalty,
627628

@@ -648,6 +649,7 @@ export class LlamaContextSequence {
648649
} = {}): AsyncGenerator<Token, void> {
649650
return this._evaluate(tokens, {
650651
temperature,
652+
minP,
651653
topK,
652654
topP,
653655
grammarEvaluationState,
@@ -707,6 +709,7 @@ export class LlamaContextSequence {
707709
/** @internal */
708710
private async *_evaluate(tokens: Token[], {
709711
temperature = 0,
712+
minP = 0,
710713
topK = 40,
711714
topP = 0.95,
712715
grammarEvaluationState,
@@ -716,7 +719,7 @@ export class LlamaContextSequence {
716719
contextShiftOptions,
717720
yieldEosToken = false
718721
}: {
719-
temperature?: number, topK?: number, topP?: number,
722+
temperature?: number, minP?: number, topK?: number, topP?: number,
720723
grammarEvaluationState?: LlamaGrammarEvaluationState | (() => LlamaGrammarEvaluationState | undefined),
721724
repeatPenalty?: LlamaContextSequenceRepeatPenalty, evaluationPriority?: EvaluationPriority,
722725
generateNewTokens?: boolean, contextShiftOptions: Required<ContextShiftOptions>, yieldEosToken?: boolean
@@ -752,6 +755,7 @@ export class LlamaContextSequence {
752755

753756
return this._context._ctx.sampleToken(batchLogitIndex, removeNullFields({
754757
temperature,
758+
minP,
755759
topK,
756760
topP,
757761
repeatPenalty: repeatPenalty?.penalty,

0 commit comments

Comments
 (0)