1111
1212struct common_speculative {
1313 struct llama_context * ctx;
14+
1415 struct common_sampler * smpl;
16+ struct common_sampler * smpl_infill;
1517
1618 llama_batch batch;
1719 llama_tokens prompt;
@@ -20,49 +22,48 @@ struct common_speculative {
2022struct common_speculative * common_speculative_init (
2123 struct llama_context * ctx_dft) {
2224 auto * result = new common_speculative {
23- /* .ctx = */ ctx_dft,
24- /* .smpl = */ nullptr ,
25- /* .batch = */ llama_batch_init (llama_n_batch (ctx_dft), 0 , 1 ),
26- /* .prompt = */ {},
25+ /* .ctx = */ ctx_dft,
26+ /* .smpl = */ nullptr ,
27+ /* .smpl_infill = */ nullptr ,
28+ /* .batch = */ llama_batch_init (llama_n_batch (ctx_dft), 0 , 1 ),
29+ /* .prompt = */ {},
2730 };
2831
29- // TODO: optimize or pass from outside?
30- #if 0
3132 {
3233 common_params_sampling params;
3334 params.no_perf = false ;
3435
35- params.top_k = 40;
36- params.top_p = 0.9;
36+ params.top_k = 10 ;
3737
3838 params.samplers = {
3939 COMMON_SAMPLER_TYPE_TOP_K,
40- COMMON_SAMPLER_TYPE_TOP_P,
41- COMMON_SAMPLER_TYPE_INFILL,
4240 };
4341
4442 result->smpl = common_sampler_init (llama_get_model (ctx_dft), params);
4543 }
46- # else
44+
4745 {
4846 common_params_sampling params;
4947 params.no_perf = false ;
5048
51- params.top_k = 10 ;
49+ params.top_k = 40 ;
50+ params.top_p = 0.9 ;
5251
5352 params.samplers = {
5453 COMMON_SAMPLER_TYPE_TOP_K,
54+ COMMON_SAMPLER_TYPE_TOP_P,
55+ COMMON_SAMPLER_TYPE_INFILL,
5556 };
5657
57- result->smpl = common_sampler_init (llama_get_model (ctx_dft), params);
58+ result->smpl_infill = common_sampler_init (llama_get_model (ctx_dft), params);
5859 }
59- #endif
6060
6161 return result;
6262}
6363
6464void common_speculative_free (struct common_speculative * spec) {
6565 common_sampler_free (spec->smpl );
66+ common_sampler_free (spec->smpl_infill );
6667
6768 llama_batch_free (spec->batch );
6869
@@ -133,7 +134,7 @@ llama_tokens common_speculative_gen_draft(
133134 llama_token id_last) {
134135 auto & batch = spec->batch ;
135136 auto & ctx = spec->ctx ;
136- auto & smpl = spec->smpl ;
137+ auto & smpl = params. infill ? spec-> smpl_infill : spec->smpl ;
137138 auto & prompt = spec->prompt ;
138139
139140 int reuse_i = 0 ;
0 commit comments