File tree Expand file tree Collapse file tree 1 file changed +5
-1
lines changed Expand file tree Collapse file tree 1 file changed +5
-1
lines changed Original file line number Diff line number Diff line change @@ -1938,7 +1938,6 @@ struct server_context {
19381938 params_dft.n_ctx = params_base.speculative .n_ctx == 0 ? params_base.n_ctx / params_base.n_parallel : params_base.speculative .n_ctx ;
19391939 params_dft.n_gpu_layers = params_base.speculative .n_gpu_layers ;
19401940 params_dft.n_parallel = 1 ;
1941- params_dft.swa_full = true ; // TODO: this is not optimal and can be improved
19421941
19431942 // force F16 KV cache for the draft model for extra performance
19441943 params_dft.cache_type_k = GGML_TYPE_F16;
@@ -2017,6 +2016,11 @@ struct server_context {
20172016 params_base.n_cache_reuse = 0 ;
20182017 SRV_WRN (" %s\n " , " cache_reuse is not supported by this context, it will be disabled" );
20192018 }
2019+
2020+ if (!params_base.speculative .model .path .empty ()) {
2021+ SRV_ERR (" %s\n " , " err: speculative decode is not supported by this context" );
2022+ return false ;
2023+ }
20202024 }
20212025
20222026 return true ;
You can’t perform that action at this time.
0 commit comments