@@ -66,16 +66,16 @@ highlight llama_hl_info guifg=#77ff2f
6666"
6767let s: default_config = {
6868 \ ' endpoint' : ' http://127.0.0.1:8012/infill' ,
69- \ ' n_prefix' : 128 ,
70- \ ' n_suffix' : 128 ,
69+ \ ' n_prefix' : 256 ,
70+ \ ' n_suffix' : 8 ,
7171 \ ' n_predict' : 64 ,
7272 \ ' t_max_prompt_ms' : 500 ,
73- \ ' t_max_predict_ms' : 500 ,
73+ \ ' t_max_predict_ms' : 200 ,
7474 \ ' show_info' : 2 ,
7575 \ ' auto_fim' : v: true ,
7676 \ ' max_line_suffix' : 8 ,
77- \ ' ring_n_chunks' : 16 ,
78- \ ' ring_chunk_size' : 128 ,
77+ \ ' ring_n_chunks' : 64 ,
78+ \ ' ring_chunk_size' : 64 ,
7979 \ ' ring_scope' : 1024 ,
8080 \ }
8181
@@ -110,13 +110,14 @@ function! llama#init()
110110 let s: content = []
111111 let s: can_accept = v: false
112112
113+ let s: timer_fim = -1
113114 let s: t_fim_start = reltime () " used to measure total FIM time
114115
115116 let s: current_job = v: null
116117
117118 augroup llama
118119 autocmd !
119- autocmd InsertEnter * inoremap <buffer > <silent> <C-F> <Esc> a
120+ autocmd InsertEnter * inoremap <expr > <silent> <C-F> llama#fim_inline(v:false, v:false)
120121 autocmd InsertLeavePre * call llama#fim_cancel ()
121122
122123 autocmd CursorMoved * call llama#fim_cancel ()
@@ -125,7 +126,7 @@ function! llama#init()
125126 if g: llama_config .auto_fim
126127 autocmd InsertEnter * call llama#fim (v: true , v: false )
127128 autocmd CursorMovedI * call llama#fim (v: true , v: false )
128- autocmd CursorHoldI * call llama#fim (v: true , v: true )
129+ " autocmd CursorHoldI * call llama#fim(v:true, v:true)
129130 else
130131 autocmd CursorMovedI * call llama#fim_cancel ()
131132 endif
@@ -202,7 +203,7 @@ function! s:pick_chunk(text, no_mod, do_evict)
202203
203204 " evict chunks that are very similar to the new one
204205 for i in range (len (s: ring_chunks ) - 1 , 0 , -1 )
205- if s: chunk_sim (s: ring_chunks [i ].data, l: chunk ) > 0.9
206+ if s: chunk_sim (s: ring_chunks [i ].data, l: chunk ) > 0.5
206207 if a: do_evict
207208 call remove (s: ring_chunks , i )
208209 let s: ring_n_evict += 1
@@ -234,9 +235,10 @@ function! s:pick_chunk(text, no_mod, do_evict)
234235 \ ' input_suffix' : " " ,
235236 \ ' n_predict' : 1 ,
236237 \ ' penalty_last_n' : 0 ,
237- \ ' top_k' : 100 ,
238+ \ ' top_k' : 40 ,
239+ \ ' top_p' : 0.99 ,
238240 \ ' stream' : v: false ,
239- \ ' samplers' : [" top_k" , " infill" ],
241+ \ ' samplers' : [" top_k" , " top_p " , " infill" ],
240242 \ ' cache_prompt' : v: true ,
241243 \ ' extra_context' : l: extra_context ,
242244 \ ' t_max_prompt_ms' : 1 ,
@@ -251,15 +253,27 @@ function! s:pick_chunk(text, no_mod, do_evict)
251253 call jobstart (l: curl_command , {})
252254endfunction
253255
256+ function ! llama#fim_inline (is_auto, on_hold) abort
257+ call llama#fim (a: is_auto , a: on_hold )
258+ return ' '
259+ endfunction
260+
254261function ! llama#fim (is_auto, on_hold) abort
255- if a: on_hold && s: hint_shown
262+ if a: on_hold && ( s: hint_shown || ( s: pos_x == col ( ' . ' ) - 1 && s: pos_y == line ( ' . ' )))
256263 return
257264 endif
258265
259266 call llama#fim_cancel ()
260267
261- if reltimefloat (reltime (s: t_fim_start )) < 0.5
268+ " avoid sending repeated requests too fast
269+ if reltimefloat (reltime (s: t_fim_start )) < 0.6
270+ if s: timer_fim != -1
271+ call timer_stop (s: timer_fim )
272+ let s: timer_fim = -1
273+ endif
274+
262275 let s: t_fim_start = reltime ()
276+ let s: timer_fim = timer_start (600 , {- > llama#fim (v: true , v: true )})
263277 return
264278 endif
265279
@@ -287,6 +301,8 @@ function! llama#fim(is_auto, on_hold) abort
287301 let l: prefix = " "
288302 \ . join (l: lines_prefix , " \n " )
289303 \ . " \n "
304+
305+ let l: prompt = " "
290306 \ . s: line_cur_prefix
291307
292308 let l: suffix = " "
@@ -306,14 +322,15 @@ function! llama#fim(is_auto, on_hold) abort
306322 endfor
307323
308324 let l: request = json_encode ({
309- \ ' prompt' : " " ,
310325 \ ' input_prefix' : l: prefix ,
326+ \ ' prompt' : l: prompt ,
311327 \ ' input_suffix' : l: suffix ,
312328 \ ' n_predict' : g: llama_config .n_predict,
313329 \ ' penalty_last_n' : 0 ,
314- \ ' top_k' : 100 ,
330+ \ ' top_k' : 40 ,
331+ \ ' top_p' : 0.99 ,
315332 \ ' stream' : v: false ,
316- \ ' samplers' : [" top_k" , " infill" ],
333+ \ ' samplers' : [" top_k" , " top_p " , " infill" ],
317334 \ ' cache_prompt' : v: true ,
318335 \ ' extra_context' : l: extra_context ,
319336 \ ' t_max_prompt_ms' : g: llama_config .t_max_prompt_ms,
@@ -343,13 +360,10 @@ function! llama#fim(is_auto, on_hold) abort
343360 let l: delta_y = abs (s: pos_y - s: pos_y_pick )
344361
345362 " only gather chunks if the cursor has moved a lot
363+ " TODO: something more clever? reranking?
346364 if a: is_auto && l: delta_y > 32
347- " randomly pick a prefix or a suffix chunk
348- if s: rand (0 , 1 )
349- call s: pick_chunk (getline (max ([1 , s: pos_y - g: llama_config .ring_scope]), max ([1 , s: pos_y - g: llama_config .n_prefix])), v: false , v: false )
350- else
351- call s: pick_chunk (getline (min ([l: max_y , s: pos_y + g: llama_config .n_suffix]), min ([l: max_y , s: pos_y + g: llama_config .ring_scope])), v: false , v: false )
352- endif
365+ call s: pick_chunk (getline (max ([1 , s: pos_y - g: llama_config .ring_scope]), max ([1 , s: pos_y - g: llama_config .n_prefix])), v: false , v: false )
366+ call s: pick_chunk (getline (min ([l: max_y , s: pos_y + g: llama_config .n_suffix]), min ([l: max_y , s: pos_y + g: llama_config .n_suffix + g: llama_config .ring_chunk_size])), v: false , v: false )
353367
354368 let s: pos_y_pick = s: pos_y
355369 endif
@@ -367,7 +381,7 @@ function! llama#fim_accept(first_line)
367381 endif
368382
369383 " move the cursor to the end of the accepted text
370- if ! a: first_line
384+ if ! a: first_line && len ( s: content ) > 1
371385 call cursor (s: pos_y + len (s: content ) - 1 , s: pos_x + s: pos_dx )
372386 else
373387 call cursor (s: pos_y , s: pos_x + len (s: content [0 ]))
@@ -462,9 +476,7 @@ function! s:fim_on_stdout(job_id, data, event) dict
462476 endif
463477
464478 if len (s: content ) == 0
465- if ! self .is_auto
466- call add (s: content , " <| EOT |>" )
467- endif
479+ call add (s: content , " " )
468480 let s: can_accept = v: false
469481 endif
470482
@@ -475,7 +487,7 @@ function! s:fim_on_stdout(job_id, data, event) dict
475487 let s: pos_dx = len (s: content [-1 ])
476488 let s: content [-1 ] .= s: line_cur_suffix
477489
478- " truncate the suggestion if it repeats the next line
490+ " truncate the suggestion if it repeats the following lines
479491 if len (s: content ) > 1 && s: content [1 ] == getline (s: pos_y + 1 )
480492 let s: content = [s: content [0 ]]
481493 endif
0 commit comments