@@ -12,14 +12,14 @@ highlight llama_hl_hint guifg=#ff772f
1212highlight  llama_hl_info guifg= #77 ff2f
1313
1414let  s: default_config  =  {
15-     \  ' endpoint'  :    ' http://127.0.0.1:8012/infill'  ,
16-     \  ' n_prefix'  :    128 ,
17-     \  ' n_suffix'  :    128 ,
18-     \  ' n_predict'  :   64 ,
19-     \  ' n_probs '  :      3 ,
20-     \  ' temperature '  : 0.1 ,
21-     \  ' auto_fim'  :    v: true ,
22-     \  ' stop'  :        [" \n "  ]
15+     \  ' endpoint'  :          ' http://127.0.0.1:8012/infill'  ,
16+     \  ' n_prefix'  :          128 ,
17+     \  ' n_suffix'  :          128 ,
18+     \  ' n_predict'  :         64 ,
19+     \  ' t_max_prompt_ms '  :  300 ,
20+     \  ' t_max_predict_ms '  : 200 ,
21+     \  ' auto_fim'  :          v: true ,
22+     \  ' stop'  :              [" \n "  ]
2323    \  }
2424
2525let  g: llama_config  =  get (g:  , ' llama_config'  , s: default_config )
@@ -48,6 +48,8 @@ function! llama#init()
4848        autocmd ! 
4949        autocmd  InsertEnter  *  inoremap  <buffer>  <silent>   <C-F>  <C-O> :call llama#fim(v:false)<CR> 
5050         autocmd  InsertLeave  *  call  llama#fim_cancel ()
51+ 
52+         autocmd  CursorMoved  *  call  llama#fim_cancel ()
5153    augroup  END 
5254
5355    silent !  call  llama#fim_cancel ()
@@ -85,19 +87,20 @@ function! llama#fim(is_auto) abort
8587        \  . " \n " 
8688
8789    let  l: request  =  json_encode ({
88-         \  ' prompt'  :         " "  ,
89-         \  ' input_prefix'  :   l: prefix ,
90-         \  ' input_suffix'  :   l: suffix ,
91-        " \ 'stop':           g:llama_config.stop, 
92-         \  ' n_predict'  :      g: llama_config .n_predict,
93-        " \ 'n_probs':        g:llama_config.n_probs, 
94-         \  ' penalty_last_n'  : 0 ,
95-         \  ' temperature'  :    g: llama_config .temperature,
96-         \  ' top_k'  :          5 ,
97-         \  ' infill_p'  :       0.20 ,
98-         \  ' infill_p_eog'  :   0.001 ,
99-         \  ' stream'  :         v: false ,
100-         \  ' samplers'  :       [" top_k"  , " infill"  ]
90+         \  ' prompt'  :           " "  ,
91+         \  ' input_prefix'  :     l: prefix ,
92+         \  ' input_suffix'  :     l: suffix ,
93+        " \ 'stop':             g:llama_config.stop, 
94+         \  ' n_predict'  :        g: llama_config .n_predict,
95+         \  ' penalty_last_n'  :   0 ,
96+         \  ' top_k'  :            5 ,
97+         \  ' infill_p'  :         0.20 ,
98+         \  ' infill_p_eog'  :     0.001 ,
99+         \  ' stream'  :           v: false ,
100+         \  ' samplers'  :         [" top_k"  , " infill"  ],
101+         \  ' t_max_prompt_ms'  :  g: llama_config .t_max_prompt_ms,
102+         \  ' t_max_predict_ms'  : g: llama_config .t_max_predict_ms,
103+         \  ' cache_prompt'  :     v: true
101104        \  })
102105
103106    let  l: curl_command  =  printf (
@@ -181,9 +184,9 @@ function! s:fim_on_stdout(job_id, data, event) dict
181184    let  l: t_prompt_ms  =  1.0 
182185    let  l: s_prompt     =  0 
183186
184-     let  l: n_gen      =  0 
185-     let  l: t_gen_ms   =  1.0 
186-     let  l: s_gen      =  0 
187+     let  l: n_predict      =  0 
188+     let  l: t_predict_ms   =  1.0 
189+     let  l: s_predict      =  0 
187190
188191    if  s: can_accept  &&  v: shell_error
189192        if  ! self .is_auto
@@ -221,9 +224,9 @@ function! s:fim_on_stdout(job_id, data, event) dict
221224            let  l: t_prompt_ms  =  get (l: timings , ' prompt_ms'  , 1 )
222225            let  l: s_prompt     =  get (l: timings , ' prompt_per_second'  , 0 )
223226
224-             let  l: n_gen      =  get (l: timings , ' predicted_n'  , 0 )
225-             let  l: t_gen_ms   =  get (l: timings , ' predicted_ms'  , 1 )
226-             let  l: s_gen      =  get (l: timings , ' predicted_per_second'  , 0 )
227+             let  l: n_predict      =  get (l: timings , ' predicted_n'  , 0 )
228+             let  l: t_predict_ms   =  get (l: timings , ' predicted_ms'  , 1 )
229+             let  l: s_predict      =  get (l: timings , ' predicted_per_second'  , 0 )
227230        endif 
228231    endif 
229232
@@ -256,8 +259,8 @@ function! s:fim_on_stdout(job_id, data, event) dict
256259
257260        let  l: info  =  printf (" %s | prompt: %d (%.2f ms, %.2f t/s) | predict: %d (%.2f ms, %.2f t/s) | total: %f.2 ms"  ,
258261            \  l: prefix ,
259-             \  l: n_prompt , l: t_prompt_ms , l: s_prompt ,
260-             \  l: n_gen  , l: t_gen_ms  , l: s_gen  ,
262+             \  l: n_prompt ,   l: t_prompt_ms ,   l: s_prompt ,
263+             \  l: n_predict  , l: t_predict_ms  , l: s_predict  ,
261264            \  1000.0  *  reltimefloat (reltime (s: t_fim_start ))
262265            \  )
263266
0 commit comments