Skip to content

Commit 26af4cd

Browse files
committed
llama.vim : add top_p + improve responsivness + fix edge cases
1 parent e262dca commit 26af4cd

File tree

1 file changed

+38
-26
lines changed

1 file changed

+38
-26
lines changed

examples/llama.vim

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -66,16 +66,16 @@ highlight llama_hl_info guifg=#77ff2f
6666
"
6767
let s:default_config = {
6868
\ 'endpoint': 'http://127.0.0.1:8012/infill',
69-
\ 'n_prefix': 128,
70-
\ 'n_suffix': 128,
69+
\ 'n_prefix': 256,
70+
\ 'n_suffix': 8,
7171
\ 'n_predict': 64,
7272
\ 't_max_prompt_ms': 500,
73-
\ 't_max_predict_ms': 500,
73+
\ 't_max_predict_ms': 200,
7474
\ 'show_info': 2,
7575
\ 'auto_fim': v:true,
7676
\ 'max_line_suffix': 8,
77-
\ 'ring_n_chunks': 16,
78-
\ 'ring_chunk_size': 128,
77+
\ 'ring_n_chunks': 64,
78+
\ 'ring_chunk_size': 64,
7979
\ 'ring_scope': 1024,
8080
\ }
8181

@@ -110,13 +110,14 @@ function! llama#init()
110110
let s:content = []
111111
let s:can_accept = v:false
112112

113+
let s:timer_fim = -1
113114
let s:t_fim_start = reltime() " used to measure total FIM time
114115

115116
let s:current_job = v:null
116117

117118
augroup llama
118119
autocmd!
119-
autocmd InsertEnter * inoremap <buffer> <silent> <C-F> <Esc>a
120+
autocmd InsertEnter * inoremap <expr> <silent> <C-F> llama#fim_inline(v:false, v:false)
120121
autocmd InsertLeavePre * call llama#fim_cancel()
121122

122123
autocmd CursorMoved * call llama#fim_cancel()
@@ -125,7 +126,7 @@ function! llama#init()
125126
if g:llama_config.auto_fim
126127
autocmd InsertEnter * call llama#fim(v:true, v:false)
127128
autocmd CursorMovedI * call llama#fim(v:true, v:false)
128-
autocmd CursorHoldI * call llama#fim(v:true, v:true)
129+
"autocmd CursorHoldI * call llama#fim(v:true, v:true)
129130
else
130131
autocmd CursorMovedI * call llama#fim_cancel()
131132
endif
@@ -202,7 +203,7 @@ function! s:pick_chunk(text, no_mod, do_evict)
202203

203204
" evict chunks that are very similar to the new one
204205
for i in range(len(s:ring_chunks) - 1, 0, -1)
205-
if s:chunk_sim(s:ring_chunks[i].data, l:chunk) > 0.9
206+
if s:chunk_sim(s:ring_chunks[i].data, l:chunk) > 0.5
206207
if a:do_evict
207208
call remove(s:ring_chunks, i)
208209
let s:ring_n_evict += 1
@@ -234,9 +235,10 @@ function! s:pick_chunk(text, no_mod, do_evict)
234235
\ 'input_suffix': "",
235236
\ 'n_predict': 1,
236237
\ 'penalty_last_n': 0,
237-
\ 'top_k': 100,
238+
\ 'top_k': 40,
239+
\ 'top_p': 0.99,
238240
\ 'stream': v:false,
239-
\ 'samplers': ["top_k", "infill"],
241+
\ 'samplers': ["top_k", "top_p", "infill"],
240242
\ 'cache_prompt': v:true,
241243
\ 'extra_context': l:extra_context,
242244
\ 't_max_prompt_ms': 1,
@@ -251,15 +253,27 @@ function! s:pick_chunk(text, no_mod, do_evict)
251253
call jobstart(l:curl_command, {})
252254
endfunction
253255

256+
function! llama#fim_inline(is_auto, on_hold) abort
257+
call llama#fim(a:is_auto, a:on_hold)
258+
return ''
259+
endfunction
260+
254261
function! llama#fim(is_auto, on_hold) abort
255-
if a:on_hold && s:hint_shown
262+
if a:on_hold && (s:hint_shown || (s:pos_x == col('.') - 1 && s:pos_y == line('.')))
256263
return
257264
endif
258265

259266
call llama#fim_cancel()
260267

261-
if reltimefloat(reltime(s:t_fim_start)) < 0.5
268+
" avoid sending repeated requests too fast
269+
if reltimefloat(reltime(s:t_fim_start)) < 0.6
270+
if s:timer_fim != -1
271+
call timer_stop(s:timer_fim)
272+
let s:timer_fim = -1
273+
endif
274+
262275
let s:t_fim_start = reltime()
276+
let s:timer_fim = timer_start(600, {-> llama#fim(v:true, v:true)})
263277
return
264278
endif
265279

@@ -287,6 +301,8 @@ function! llama#fim(is_auto, on_hold) abort
287301
let l:prefix = ""
288302
\ . join(l:lines_prefix, "\n")
289303
\ . "\n"
304+
305+
let l:prompt = ""
290306
\ . s:line_cur_prefix
291307

292308
let l:suffix = ""
@@ -306,14 +322,15 @@ function! llama#fim(is_auto, on_hold) abort
306322
endfor
307323

308324
let l:request = json_encode({
309-
\ 'prompt': "",
310325
\ 'input_prefix': l:prefix,
326+
\ 'prompt': l:prompt,
311327
\ 'input_suffix': l:suffix,
312328
\ 'n_predict': g:llama_config.n_predict,
313329
\ 'penalty_last_n': 0,
314-
\ 'top_k': 100,
330+
\ 'top_k': 40,
331+
\ 'top_p': 0.99,
315332
\ 'stream': v:false,
316-
\ 'samplers': ["top_k", "infill"],
333+
\ 'samplers': ["top_k", "top_p", "infill"],
317334
\ 'cache_prompt': v:true,
318335
\ 'extra_context': l:extra_context,
319336
\ 't_max_prompt_ms': g:llama_config.t_max_prompt_ms,
@@ -343,13 +360,10 @@ function! llama#fim(is_auto, on_hold) abort
343360
let l:delta_y = abs(s:pos_y - s:pos_y_pick)
344361

345362
" only gather chunks if the cursor has moved a lot
363+
" TODO: something more clever? reranking?
346364
if a:is_auto && l:delta_y > 32
347-
" randomly pick a prefix or a suffix chunk
348-
if s:rand(0, 1)
349-
call s:pick_chunk(getline(max([1, s:pos_y - g:llama_config.ring_scope]), max([1, s:pos_y - g:llama_config.n_prefix])), v:false, v:false)
350-
else
351-
call s:pick_chunk(getline(min([l:max_y, s:pos_y + g:llama_config.n_suffix]), min([l:max_y, s:pos_y + g:llama_config.ring_scope])), v:false, v:false)
352-
endif
365+
call s:pick_chunk(getline(max([1, s:pos_y - g:llama_config.ring_scope]), max([1, s:pos_y - g:llama_config.n_prefix])), v:false, v:false)
366+
call s:pick_chunk(getline(min([l:max_y, s:pos_y + g:llama_config.n_suffix]), min([l:max_y, s:pos_y + g:llama_config.n_suffix + g:llama_config.ring_chunk_size])), v:false, v:false)
353367

354368
let s:pos_y_pick = s:pos_y
355369
endif
@@ -367,7 +381,7 @@ function! llama#fim_accept(first_line)
367381
endif
368382

369383
" move the cursor to the end of the accepted text
370-
if !a:first_line
384+
if !a:first_line && len(s:content) > 1
371385
call cursor(s:pos_y + len(s:content) - 1, s:pos_x + s:pos_dx)
372386
else
373387
call cursor(s:pos_y, s:pos_x + len(s:content[0]))
@@ -462,9 +476,7 @@ function! s:fim_on_stdout(job_id, data, event) dict
462476
endif
463477

464478
if len(s:content) == 0
465-
if !self.is_auto
466-
call add(s:content, "<| EOT |>")
467-
endif
479+
call add(s:content, "")
468480
let s:can_accept = v:false
469481
endif
470482

@@ -475,7 +487,7 @@ function! s:fim_on_stdout(job_id, data, event) dict
475487
let s:pos_dx = len(s:content[-1])
476488
let s:content[-1] .= s:line_cur_suffix
477489

478-
" truncate the suggestion if it repeats the next line
490+
" truncate the suggestion if it repeats the following lines
479491
if len(s:content) > 1 && s:content[1] == getline(s:pos_y + 1)
480492
let s:content = [s:content[0]]
481493
endif

0 commit comments

Comments
 (0)