Skip to content

Commit 871aa69

Browse files
committed
llama.vim : add ring context from opened files and yanked text
1 parent c6afab9 commit 871aa69

File tree

1 file changed

+114
-20
lines changed

1 file changed

+114
-20
lines changed

examples/llama.vim

Lines changed: 114 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -38,27 +38,49 @@
3838
highlight llama_hl_hint guifg=#ff772f
3939
highlight llama_hl_info guifg=#77ff2f
4040

41-
" endpoint: llama.cpp server endpoint
42-
" n_prefix: number of lines before the cursor location to include in the prefix
43-
" n_suffix: number of lines after the cursor location to include in the suffix
44-
" n_predict: max number of tokens to predict
45-
" t_max_prompt_ms: max alloted time for the prompt generation (TODO: not yet supported)
46-
" t_max_predict_ms: max alloted time for the prediction
47-
" show_info: show extra info about the inference
48-
" auto_fim: trigger FIM completion automatically on cursor movement
41+
" general parameters:
42+
"
43+
" endpoint: llama.cpp server endpoint
44+
" n_prefix: number of lines before the cursor location to include in the prefix
45+
" n_suffix: number of lines after the cursor location to include in the suffix
46+
" n_predict: max number of tokens to predict
47+
" t_max_prompt_ms: max alloted time for the prompt generation (TODO: not yet supported)
48+
" t_max_predict_ms: max alloted time for the prediction
49+
" show_info: show extra info about the inference (0 - disabled, 1 - statusline, 2 - inline)
50+
" auto_fim: trigger FIM completion automatically on cursor movement
51+
"
52+
" ring buffer of chunks, accumulated with time upon:
53+
"
54+
" - completion request
55+
" - yank
56+
" - reading a file
57+
"
58+
" ring context parameters:
59+
"
60+
" ring_n_chunks: max number of chunks to pass as extra context to the server (0 to disable)
61+
" ring_chunk_size: max size of the chunks (in number of lines)
62+
" ring_scope: the range around the cursor position (in number of lines) for gathering chunks
63+
"
4964
let s:default_config = {
5065
\ 'endpoint': 'http://127.0.0.1:8012/infill',
51-
\ 'n_prefix': 256,
52-
\ 'n_suffix': 256,
66+
\ 'n_prefix': 128,
67+
\ 'n_suffix': 128,
5368
\ 'n_predict': 64,
5469
\ 't_max_prompt_ms': 500,
5570
\ 't_max_predict_ms': 200,
56-
\ 'show_info': v:true,
71+
\ 'show_info': 2,
5772
\ 'auto_fim': v:true,
73+
\ 'ring_n_chunks': 32,
74+
\ 'ring_chunk_size': 64,
75+
\ 'ring_scope': 1024,
5876
\ }
5977

6078
let g:llama_config = get(g:, 'llama_config', s:default_config)
6179

80+
function! s:rand(i0, i1) abort
81+
return a:i0 + rand() % (a:i1 - a:i0 + 1)
82+
endfunction
83+
6284
function! llama#init()
6385
if !executable('curl')
6486
echohl WarningMsg
@@ -76,6 +98,9 @@ function! llama#init()
7698
let s:line_cur_prefix = ''
7799
let s:line_cur_suffix = ''
78100

101+
let s:ring_n_chunks = []
102+
103+
let s:pos_y_pick = -9999 " last y where we picked a chunk
79104
let s:pos_dx = 0
80105
let s:content = []
81106
let s:can_accept = v:false
@@ -91,12 +116,55 @@ function! llama#init()
91116
autocmd InsertEnter * inoremap <buffer> <silent> <C-F> <C-O>:call llama#fim(v:false)<CR>
92117
autocmd InsertLeavePre * call llama#fim_cancel()
93118

94-
autocmd CursorMoved * call llama#fim_cancel()
119+
autocmd CursorMoved * call llama#fim_cancel()
120+
121+
autocmd TextYankPost * if v:event.operator ==# 'y' | call s:pick_chunk(v:event.regcontents, v:false) | endif
122+
123+
autocmd BufEnter * call timer_start(100, {-> s:pick_chunk(getline(max([1, line('.') - g:llama_config.ring_chunk_size/2]), min([line('.') + g:llama_config.ring_chunk_size/2, line('$')])), v:true)})
95124
augroup END
96125

97126
silent! call llama#fim_cancel()
98127
endfunction
99128

129+
function! s:pick_chunk(text, no_mod)
130+
" do not pick chunks from buffers with pending changes or buffers that are not files
131+
if a:no_mod && (getbufvar(bufnr('%'), '&modified') || !buflisted(bufnr('%')) || !filereadable(expand('%')))
132+
return
133+
endif
134+
135+
if g:llama_config.ring_n_chunks <= 0
136+
return
137+
endif
138+
139+
if len(a:text) + 1 < g:llama_config.ring_chunk_size
140+
let l:chunk = join(a:text, "\n")
141+
else
142+
let l:l0 = s:rand(0, len(a:text) - g:llama_config.ring_chunk_size)
143+
let l:l1 = l:l0 + g:llama_config.ring_chunk_size
144+
145+
let l:chunk = join(a:text[l:l0:l:l1], "\n")
146+
endif
147+
148+
" check if this chunk is already added
149+
let l:exist = v:false
150+
for i in range(len(s:ring_n_chunks))
151+
if s:ring_n_chunks[i] == l:chunk
152+
let l:exist = v:true
153+
break
154+
endif
155+
endfor
156+
157+
if l:exist
158+
return
159+
endif
160+
161+
if len(s:ring_n_chunks) == g:llama_config.ring_n_chunks
162+
call remove(s:ring_n_chunks, 0)
163+
endif
164+
165+
call add(s:ring_n_chunks, l:chunk)
166+
endfunction
167+
100168
function! llama#fim(is_auto) abort
101169
let s:t_fim_start = reltime()
102170

@@ -128,6 +196,20 @@ function! llama#fim(is_auto) abort
128196
\ . join(l:lines_suffix, "\n")
129197
\ . "\n"
130198

199+
" TODO: per-file location
200+
let l:delta_y = abs(s:pos_y - s:pos_y_pick)
201+
202+
" only gather chunks if the cursor has moved a lot
203+
if a:is_auto && l:delta_y > 32
204+
" pick a prefix chunk
205+
call s:pick_chunk(getline(max([1, s:pos_y - g:llama_config.ring_scope]), max([1, s:pos_y - g:llama_config.n_prefix])), v:false)
206+
207+
"" pick a suffix chunk
208+
call s:pick_chunk(getline(min([l:max_y, s:pos_y + g:llama_config.n_suffix]), min([l:max_y, s:pos_y + g:llama_config.ring_scope])), v:false)
209+
210+
let s:pos_y_pick = s:pos_y
211+
endif
212+
131213
let l:request = json_encode({
132214
\ 'prompt': "",
133215
\ 'input_prefix': l:prefix,
@@ -137,7 +219,8 @@ function! llama#fim(is_auto) abort
137219
\ 'top_k': 100,
138220
\ 'stream': v:false,
139221
\ 'samplers': ["top_k", "infill"],
140-
"\ 'cache_prompt': v:true,
222+
\ 'cache_prompt': v:true,
223+
\ 'extra_context': s:ring_n_chunks,
141224
\ 't_max_prompt_ms': g:llama_config.t_max_prompt_ms,
142225
\ 't_max_predict_ms': g:llama_config.t_max_predict_ms
143226
\ })
@@ -235,6 +318,7 @@ function! s:fim_auto()
235318
call jobstop(s:current_job)
236319
endif
237320

321+
" TODO: when job cancellation is implemented on the server, reduce these timeouts
238322
if reltimefloat(reltime(s:t_fim_last)) < 500*0.001
239323
if s:timer_fim != -1
240324
call timer_stop(s:timer_fim)
@@ -284,6 +368,11 @@ function! s:fim_on_stdout(job_id, data, event) dict
284368
call remove(s:content, -1)
285369
endwhile
286370

371+
let l:generation_settings = get(l:response, 'generation_settings', {})
372+
let l:n_ctx = get(l:generation_settings, 'n_ctx', 0)
373+
374+
let l:n_cached = get(l:response, 'tokens_cached', 0)
375+
287376
" if response.timings is available
288377
if len(get(l:response, 'timings', {})) > 0
289378
let l:has_info = v:true
@@ -322,21 +411,26 @@ function! s:fim_on_stdout(job_id, data, event) dict
322411
let l:id_vt_info = nvim_create_namespace('vt_info')
323412

324413
" construct the info message and display it to the right of the current line
325-
if g:llama_config.show_info && l:has_info
414+
if g:llama_config.show_info > 0 && l:has_info
326415
" prefix the info string with whitespace in order to offset it to the right of the fim overlay
327416
let l:prefix = repeat(' ', len(s:content[0]) - len(s:line_cur_suffix) + 3)
328417

329-
let l:info = printf("%s | prompt: %d (%.2f ms, %.2f t/s) | predict: %d (%.2f ms, %.2f t/s) | total: %.2f ms",
330-
\ l:prefix,
418+
let l:info = printf("%s | context: %d / %d | prompt: %d (%.2f ms, %.2f t/s) | predict: %d (%.2f ms, %.2f t/s) | total: %.2f ms",
419+
\ g:llama_config.show_info == 2 ? l:prefix : '',
420+
\ l:n_cached, l:n_ctx,
331421
\ l:n_prompt, l:t_prompt_ms, l:s_prompt,
332422
\ l:n_predict, l:t_predict_ms, l:s_predict,
333423
\ 1000.0 * reltimefloat(reltime(s:t_fim_start))
334424
\ )
335425

336-
call nvim_buf_set_extmark(l:bufnr, l:id_vt_info, s:pos_y - 1, s:pos_x - 1, {
337-
\ 'virt_text': [[l:info, 'llama_hl_info']],
338-
\ 'virt_text_pos': 'eol',
339-
\ })
426+
if g:llama_config.show_info == 1
427+
let &statusline = l:info
428+
elseif g:llama_config.show_info == 2
429+
call nvim_buf_set_extmark(l:bufnr, l:id_vt_info, s:pos_y - 1, s:pos_x - 1, {
430+
\ 'virt_text': [[l:info, 'llama_hl_info']],
431+
\ 'virt_text_pos': 'eol',
432+
\ })
433+
endif
340434
endif
341435

342436
" display the suggestion

0 commit comments

Comments
 (0)