@@ -14,6 +14,7 @@ import (
1414 "os/exec"
1515 "path/filepath"
1616 "runtime"
17+ "strconv"
1718 "strings"
1819 "sync"
1920 "time"
@@ -99,7 +100,34 @@ func findFreePort() (int, error) {
99100 return l .Addr ().(* net.TCPAddr ).Port , nil
100101}
101102
102- func newLlamaEngine (modelPath , modelName string , verbose bool , mmproj ... string ) (* llamaEngine , error ) {
103+ // llamaReadyTimeout returns how long to wait for llama-server /health after start.
104+ // Large GGUF files can take many minutes to mmap / load to GPU.
105+ func llamaReadyTimeout (modelPath string ) time.Duration {
106+ if v := strings .TrimSpace (os .Getenv ("CSGHUB_LITE_LLAMA_READY_TIMEOUT" )); v != "" {
107+ if d , err := time .ParseDuration (v ); err == nil && d > 0 {
108+ return d
109+ }
110+ if sec , err := strconv .Atoi (v ); err == nil && sec > 0 {
111+ return time .Duration (sec ) * time .Second
112+ }
113+ }
114+ fi , err := os .Stat (modelPath )
115+ if err != nil {
116+ return 20 * time .Minute
117+ }
118+ gb := float64 (fi .Size ()) / (1024 * 1024 * 1024 )
119+ // 2 min base + ~1 min per GiB (F16 9B is ~17GiB on disk → ~19 min).
120+ sec := int (120 + gb * 60 )
121+ if sec < 120 {
122+ sec = 120
123+ }
124+ if sec > 45 * 60 {
125+ sec = 45 * 60
126+ }
127+ return time .Duration (sec ) * time .Second
128+ }
129+
130+ func newLlamaEngine (modelPath , modelName string , verbose bool , progress ConvertProgressFunc , mmproj ... string ) (* llamaEngine , error ) {
103131 binary := findLlamaBinary ()
104132 if binary == "" {
105133 return nil , fmt .Errorf ("llama-server not found in PATH.\n " +
@@ -141,7 +169,8 @@ func newLlamaEngine(modelPath, modelName string, verbose bool, mmproj ...string)
141169 engine .cmd .Stdout = os .Stderr
142170 engine .cmd .Stderr = os .Stderr
143171 } else {
144- w := newCappedWriter (8192 )
172+ // Large models print long tensor/KV lists; keep more tail for error diagnosis.
173+ w := newCappedWriter (64 * 1024 )
145174 engine .cmd .Stdout = w
146175 engine .cmd .Stderr = w
147176 engine .logBuf = w
@@ -164,15 +193,19 @@ func newLlamaEngine(modelPath, modelName string, verbose bool, mmproj ...string)
164193 return nil , fmt .Errorf ("starting llama-server: %w" , err )
165194 }
166195
167- if err := engine .waitForReady (30 * time .Second ); err != nil {
196+ readyTimeout := llamaReadyTimeout (modelPath )
197+ if progress != nil {
198+ progress ("Starting llama-server" , 0 , 0 )
199+ }
200+ if err := engine .waitForReady (readyTimeout , progress ); err != nil {
168201 engine .Close ()
169202 return nil , fmt .Errorf ("llama-server failed to start: %w" , err )
170203 }
171204
172205 return engine , nil
173206}
174207
175- func (e * llamaEngine ) waitForReady (timeout time.Duration ) error {
208+ func (e * llamaEngine ) waitForReady (timeout time.Duration , progress ConvertProgressFunc ) error {
176209 deadline := time .Now ().Add (timeout )
177210 url := fmt .Sprintf ("http://127.0.0.1:%d/health" , e .port )
178211
@@ -182,7 +215,15 @@ func (e *llamaEngine) waitForReady(timeout time.Duration) error {
182215 exited := make (chan error , 1 )
183216 go func () { exited <- e .cmd .Wait () }()
184217
218+ start := time .Now ()
219+ lastBeat := time.Time {}
220+
185221 for time .Now ().Before (deadline ) {
222+ if progress != nil && time .Since (lastBeat ) >= 2 * time .Second {
223+ progress ("Loading model with llama-server" , int (time .Since (start ).Seconds ()), 0 )
224+ lastBeat = time .Now ()
225+ }
226+
186227 select {
187228 case err := <- exited :
188229 msg := "llama-server exited unexpectedly"
@@ -209,7 +250,7 @@ func (e *llamaEngine) waitForReady(timeout time.Duration) error {
209250 time .Sleep (500 * time .Millisecond )
210251 }
211252
212- msg := "timeout waiting for llama-server to be ready"
253+ msg := fmt . Sprintf ( "timeout waiting for llama-server to be ready (waited %v; large models need more time — try CSGHUB_LITE_LLAMA_READY_TIMEOUT=45m)" , timeout )
213254 if e .logBuf != nil {
214255 if tail := strings .TrimSpace (e .logBuf .String ()); tail != "" {
215256 msg += "\n \n llama-server output:\n " + tail
0 commit comments