Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,10 @@ struct common_params_speculative {

int32_t n_ctx = 0; // draft context size
int32_t n_max = 16; // maximum number of tokens to draft during speculative decoding
int32_t n_min = 5; // minimum number of draft tokens to use for speculative decoding
int32_t n_min = 0; // minimum number of draft tokens to use for speculative decoding
int32_t n_gpu_layers = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
float p_split = 0.1f; // speculative decoding split probability
float p_min = 0.9f; // minimum speculative decoding probability (greedy)
float p_min = 0.75f; // minimum speculative decoding probability (greedy)

struct cpu_params cpuparams;
struct cpu_params cpuparams_batch;
Expand Down
2 changes: 1 addition & 1 deletion common/speculative.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ struct common_speculative_params {
int n_draft = 16; // max drafted tokens
int n_reuse = 256;

float p_min = 0.9f; // min probability required to accept a token in the draft
float p_min = 0.75f; // min probability required to accept a token in the draft
};

struct common_speculative * common_speculative_init(struct llama_context * ctx_dft);
Expand Down
Loading