@@ -17,6 +17,7 @@ func newConfigureCmd() *cobra.Command {
1717 var numTokens int
1818 var minAcceptanceRate float64
1919 var hfOverrides string
20+ var contextSize int64
2021 var reasoningBudget int64
2122
2223 c := & cobra.Command {
@@ -34,6 +35,10 @@ func newConfigureCmd() *cobra.Command {
3435 return nil
3536 },
3637 RunE : func (cmd * cobra.Command , args []string ) error {
38+ if cmd .Flags ().Changed ("context-size" ) {
39+ v := int32 (contextSize )
40+ opts .ContextSize = & v
41+ }
3742 // Build the speculative config if any speculative flags are set
3843 if draftModel != "" || numTokens > 0 || minAcceptanceRate > 0 {
3944 opts .Speculative = & inference.SpeculativeDecodingConfig {
@@ -64,14 +69,15 @@ func newConfigureCmd() *cobra.Command {
6469 if opts .LlamaCpp == nil {
6570 opts .LlamaCpp = & inference.LlamaCppConfig {}
6671 }
67- opts .LlamaCpp .ReasoningBudget = & reasoningBudget
72+ v := int32 (reasoningBudget )
73+ opts .LlamaCpp .ReasoningBudget = & v
6874 }
6975 return desktopClient .ConfigureBackend (opts )
7076 },
7177 ValidArgsFunction : completion .ModelNames (getDesktopClient , - 1 ),
7278 }
7379
74- c .Flags ().Int64Var (& opts . ContextSize , "context-size" , - 1 , "context size (in tokens)" )
80+ c .Flags ().Int64Var (& contextSize , "context-size" , 0 , "context size (in tokens)" )
7581 c .Flags ().StringVar (& draftModel , "speculative-draft-model" , "" , "draft model for speculative decoding" )
7682 c .Flags ().IntVar (& numTokens , "speculative-num-tokens" , 0 , "number of tokens to predict speculatively" )
7783 c .Flags ().Float64Var (& minAcceptanceRate , "speculative-min-acceptance-rate" , 0 , "minimum acceptance rate for speculative decoding" )
0 commit comments