@@ -307,9 +307,9 @@ function! s:ring_update()
307307        \  ' samplers' " temperature" 
308308        \  ' cache_prompt' v: true
309309        \  ' t_max_prompt_ms' 1 ,
310-         \  ' t_max_predict_ms' 1 
310+         \  ' t_max_predict_ms' 1 ,
311+         \  ' response_fields' " " 
311312        \  })
312- 
313313    let  l: curl_command=  [
314314        \  " curl" 
315315        \  " --silent" 
@@ -420,7 +420,20 @@ function! llama#fim(is_auto, cache) abort
420420        \  ' samplers' " top_k" " top_p" " infill" 
421421        \  ' cache_prompt' v: true
422422        \  ' t_max_prompt_ms' g: llama_config
423-         \  ' t_max_predict_ms' g: llama_config
423+         \  ' t_max_predict_ms' g: llama_config
424+         \  ' response_fields' 
425+         \                        " content" 
426+         \                        " timings/prompt_n" 
427+         \                        " timings/prompt_ms" 
428+         \                        " timings/prompt_per_token_ms" 
429+         \                        " timings/prompt_per_second" 
430+         \                        " timings/predicted_n" 
431+         \                        " timings/predicted_ms" 
432+         \                        " timings/predicted_per_token_ms" 
433+         \                        " timings/predicted_per_second" 
434+         \                        " truncated" 
435+         \                        " tokens_cached" 
436+         \                      ],
424437        \  })
425438
426439    let  l: curl_command=  [
@@ -662,24 +675,21 @@ function! s:fim_on_stdout(hash, cache, pos_x, pos_y, is_auto, job_id, data, even
662675            call  remove (s: content-1 )
663676        endwhile 
664677
665-         let  l: generation_settings=  get (l: response' generation_settings' 
666-         let  l: n_ctx=  get (l: generation_settings' n_ctx' 0 )
667- 
668-         let  l: n_cached=  get (l: response' tokens_cached' 0 )
669-         let  l: truncated=  get (l: response' truncated' v: false
678+         let  l: n_cached=  get (l: response' timings/tokens_cached' 0 )
679+         let  l: truncated=  get (l: response' timings/truncated' v: false
670680
671681        "  if response.timings is available 
672-         if  len (get (l: response' timings'  >  0 
682+         if  has_key (l: response' timings/prompt_n' &&  has_key (l: response' timings/prompt_ms' &&  has_key (l: response' timings/prompt_per_second' 
683+             \  &&  has_key (l: response' timings/predicted_n' &&  has_key (l: response' timings/predicted_ms' &&  has_key (l: response' timings/predicted_per_second' 
673684            let  l: has_info=  v: true
674-             let  l: timings=  get (l: response' timings' 
675685
676-             let  l: n_prompt=  get (l: timings ' prompt_n' 0 )
677-             let  l: t_prompt_ms=  get (l: timings ' prompt_ms' 1 )
678-             let  l: s_prompt=  get (l: timings ' prompt_per_second' 0 )
686+             let  l: n_prompt=  get (l: response ' timings/ prompt_n' 0 )
687+             let  l: t_prompt_ms=  get (l: response ' timings/ prompt_ms' 1 )
688+             let  l: s_prompt=  get (l: response ' timings/ prompt_per_second' 0 )
679689
680-             let  l: n_predict=  get (l: timings ' predicted_n' 0 )
681-             let  l: t_predict_ms=  get (l: timings ' predicted_ms' 1 )
682-             let  l: s_predict=  get (l: timings ' predicted_per_second' 0 )
690+             let  l: n_predict=  get (l: response ' timings/ predicted_n' 0 )
691+             let  l: t_predict_ms=  get (l: response ' timings/ predicted_ms' 1 )
692+             let  l: s_predict=  get (l: response ' timings/ predicted_per_second' 0 )
683693        endif 
684694
685695        "  if response was pulled from cache 
@@ -772,9 +782,9 @@ function! s:fim_on_stdout(hash, cache, pos_x, pos_y, is_auto, job_id, data, even
772782        let  l: prefix=  '    ' 
773783
774784        if  l: truncated
775-             let  l: info=  printf (" %s | WARNING: the context is full: %d / %d , increase the server context size or reduce g:llama_config.ring_n_chunks" 
785+             let  l: info=  printf (" %s | WARNING: the context is full: %d, increase the server context size or reduce g:llama_config.ring_n_chunks" 
776786                \  g: llama_config==  2  ? l: prefix' llama.vim' 
777-                 \  l: n_cached,  l: n_ctx 
787+                 \  l: n_cached
778788                \  )
779789        elseif  l: is_cached
780790            let  l: info=  printf (" %s | C: %d / %d, | t: %.2f ms" 
@@ -783,9 +793,9 @@ function! s:fim_on_stdout(hash, cache, pos_x, pos_y, is_auto, job_id, data, even
783793                \  1000.0  *  reltimefloat (reltime (s: t_fim_start
784794                \  )
785795        else 
786-             let  l: info=  printf (" %s | c: %d / %d , r: %d / %d, e: %d, q: %d / 16 | p: %d (%.2f ms, %.2f t/s) | g: %d (%.2f ms, %.2f t/s) | t: %.2f ms" 
796+             let  l: info=  printf (" %s | c: %d, r: %d / %d, e: %d, q: %d / 16 | p: %d (%.2f ms, %.2f t/s) | g: %d (%.2f ms, %.2f t/s) | t: %.2f ms" 
787797                \  g: llama_config==  2  ? l: prefix' llama.vim' 
788-                 \  l: n_cachedl: n_ctx ,  len (s: ring_chunksg: llama_configs: ring_n_evictlen (s: ring_queued
798+                 \  l: n_cachedlen (s: ring_chunksg: llama_configs: ring_n_evictlen (s: ring_queued
789799                \  l: n_promptl: t_prompt_msl: s_prompt
790800                \  l: n_predictl: t_predict_msl: s_predict
791801                \  1000.0  *  reltimefloat (reltime (s: t_fim_start
0 commit comments