Skip to content

Commit 965ad1c

Browse files
committed
speculative : update default params
1 parent 9626d93 commit 965ad1c

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

common/common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,10 @@ struct common_params_speculative {
178178

179179
int32_t n_ctx = 0; // draft context size
180180
int32_t n_max = 16; // maximum number of tokens to draft during speculative decoding
181-
int32_t n_min = 5; // minimum number of draft tokens to use for speculative decoding
181+
int32_t n_min = 0; // minimum number of draft tokens to use for speculative decoding
182182
int32_t n_gpu_layers = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
183183
float p_split = 0.1f; // speculative decoding split probability
184-
float p_min = 0.9f; // minimum speculative decoding probability (greedy)
184+
float p_min = 0.75f; // minimum speculative decoding probability (greedy)
185185

186186
struct cpu_params cpuparams;
187187
struct cpu_params cpuparams_batch;

common/speculative.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ struct common_speculative_params {
99
int n_draft = 16; // max drafted tokens
1010
int n_reuse = 256;
1111

12-
float p_min = 0.9f; // min probability required to accept a token in the draft
12+
float p_min = 0.75f; // min probability required to accept a token in the draft
1313
};
1414

1515
struct common_speculative * common_speculative_init(struct llama_context * ctx_dft);

0 commit comments

Comments
 (0)