@@ -37,6 +37,7 @@ func newComposeCmd() *cobra.Command {
3737func newUpCommand () * cobra.Command {
3838 var models []string
3939 var ctxSize int64
40+ var rawRuntimeFlags string
4041 var backend string
4142 var draftModel string
4243 var numTokens int
@@ -69,6 +70,9 @@ func newUpCommand() *cobra.Command {
6970 if ctxSize > 0 {
7071 sendInfo (fmt .Sprintf ("Setting context size to %d" , ctxSize ))
7172 }
73+ if rawRuntimeFlags != "" {
74+ sendInfo ("Setting raw runtime flags to " + rawRuntimeFlags )
75+ }
7276
7377 // Build speculative config if any speculative flags are set
7478 var speculativeConfig * inference.SpeculativeDecodingConfig
@@ -89,10 +93,11 @@ func newUpCommand() *cobra.Command {
8993 ContextSize : & size ,
9094 Speculative : speculativeConfig ,
9195 },
96+ RawRuntimeFlags : rawRuntimeFlags ,
9297 }); err != nil {
93- configErrFmtString := "failed to configure backend for model %s with context-size %d"
94- _ = sendErrorf (configErrFmtString + ": %v" , model , ctxSize , err )
95- return fmt .Errorf (configErrFmtString + ": %w" , model , ctxSize , err )
98+ configErrFmtString := "failed to configure backend for model %s with context-size %d and runtime-flags %s "
99+ _ = sendErrorf (configErrFmtString + ": %v" , model , rawRuntimeFlags , ctxSize , err )
100+ return fmt .Errorf (configErrFmtString + ": %w" , model , ctxSize , rawRuntimeFlags , err )
96101 }
97102 sendInfo ("Successfully configured backend for model " + model )
98103 }
@@ -114,6 +119,7 @@ func newUpCommand() *cobra.Command {
114119 }
115120 c .Flags ().StringArrayVar (& models , "model" , nil , "model to use" )
116121 c .Flags ().Int64Var (& ctxSize , "context-size" , - 1 , "context size for the model" )
122+ c .Flags ().StringVar (& rawRuntimeFlags , "runtime-flags" , "" , "raw runtime flags to pass to the inference engine" )
117123 c .Flags ().StringVar (& backend , "backend" , llamacpp .Name , "inference backend to use" )
118124 c .Flags ().StringVar (& draftModel , "speculative-draft-model" , "" , "draft model for speculative decoding" )
119125 c .Flags ().IntVar (& numTokens , "speculative-num-tokens" , 0 , "number of tokens to predict speculatively" )
0 commit comments