File tree Expand file tree Collapse file tree 2 files changed +3
-3
lines changed Expand file tree Collapse file tree 2 files changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -178,10 +178,10 @@ struct common_params_speculative {
178178
179179 int32_t n_ctx = 0 ; // draft context size
180180 int32_t n_max = 16 ; // maximum number of tokens to draft during speculative decoding
181- int32_t n_min = 5 ; // minimum number of draft tokens to use for speculative decoding
181+ int32_t n_min = 0 ; // minimum number of draft tokens to use for speculative decoding
182182 int32_t n_gpu_layers = -1 ; // number of layers to store in VRAM for the draft model (-1 - use default)
183183 float p_split = 0 .1f ; // speculative decoding split probability
184- float p_min = 0 . 9f ; // minimum speculative decoding probability (greedy)
184+ float p_min = 0 . 75f ; // minimum speculative decoding probability (greedy)
185185
186186 struct cpu_params cpuparams;
187187 struct cpu_params cpuparams_batch;
Original file line number Diff line number Diff line change @@ -9,7 +9,7 @@ struct common_speculative_params {
99 int n_draft = 16 ; // max drafted tokens
1010 int n_reuse = 256 ;
1111
12- float p_min = 0.9f ; // min probability required to accept a token in the draft
12+ float p_min = 0.75f ; // min probability required to accept a token in the draft
1313};
1414
1515struct common_speculative * common_speculative_init (struct llama_context * ctx_dft );
You can’t perform that action at this time.
0 commit comments