File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -157,8 +157,8 @@ struct common_params_sampling {
157157
158158struct common_params_speculative {
159159 int32_t n_ctx = 4096 ; // draft context size
160- int32_t n_max = 5 ; // maximum number of tokens to draft during speculative decoding
161- int32_t n_min = 0 ; // minimum number of draft tokens to use for speculative decoding
160+ int32_t n_max = 16 ; // maximum number of tokens to draft during speculative decoding
161+ int32_t n_min = 5 ; // minimum number of draft tokens to use for speculative decoding
162162 int32_t n_gpu_layers = -1 ; // number of layers to store in VRAM for the draft model (-1 - use default)
163163 float p_split = 0 .1f ; // speculative decoding split probability
164164 float p_min = 0 .9f ; // minimum speculative decoding probability (greedy)
You can’t perform that action at this time.
0 commit comments