3838highlight llama_hl_hint guifg= #ff772f
3939highlight llama_hl_info guifg= #77 ff2f
4040
41- " endpoint: llama.cpp server endpoint
42- " n_prefix: number of lines before the cursor location to include in the prefix
43- " n_suffix: number of lines after the cursor location to include in the suffix
44- " n_predict: max number of tokens to predict
45- " t_max_prompt_ms: max alloted time for the prompt generation (TODO: not yet supported)
46- " t_max_predict_ms: max alloted time for the prediction
47- " show_info: show extra info about the inference
48- " auto_fim: trigger FIM completion automatically on cursor movement
41+ " general parameters:
42+ "
43+ " endpoint: llama.cpp server endpoint
44+ " n_prefix: number of lines before the cursor location to include in the prefix
45+ " n_suffix: number of lines after the cursor location to include in the suffix
46+ " n_predict: max number of tokens to predict
47+ " t_max_prompt_ms: max alloted time for the prompt generation (TODO: not yet supported)
48+ " t_max_predict_ms: max alloted time for the prediction
49+ " show_info: show extra info about the inference (0 - disabled, 1 - statusline, 2 - inline)
50+ " auto_fim: trigger FIM completion automatically on cursor movement
51+ "
52+ " ring buffer of chunks, accumulated with time upon:
53+ "
54+ " - completion request
55+ " - yank
56+ " - reading a file
57+ "
58+ " ring context parameters:
59+ "
60+ " ring_n_chunks: max number of chunks to pass as extra context to the server (0 to disable)
61+ " ring_chunk_size: max size of the chunks (in number of lines)
62+ " ring_scope: the range around the cursor position (in number of lines) for gathering chunks
63+ "
4964let s: default_config = {
5065 \ ' endpoint' : ' http://127.0.0.1:8012/infill' ,
51- \ ' n_prefix' : 256 ,
52- \ ' n_suffix' : 256 ,
66+ \ ' n_prefix' : 128 ,
67+ \ ' n_suffix' : 128 ,
5368 \ ' n_predict' : 64 ,
5469 \ ' t_max_prompt_ms' : 500 ,
5570 \ ' t_max_predict_ms' : 200 ,
56- \ ' show_info' : v: true ,
71+ \ ' show_info' : 2 ,
5772 \ ' auto_fim' : v: true ,
73+ \ ' ring_n_chunks' : 32 ,
74+ \ ' ring_chunk_size' : 64 ,
75+ \ ' ring_scope' : 1024 ,
5876 \ }
5977
6078let g: llama_config = get (g: , ' llama_config' , s: default_config )
6179
80+ function ! s: rand (i0, i1) abort
81+ return a: i0 + rand () % (a: i1 - a: i0 + 1 )
82+ endfunction
83+
6284function ! llama#init ()
6385 if ! executable (' curl' )
6486 echohl WarningMsg
@@ -76,6 +98,9 @@ function! llama#init()
7698 let s: line_cur_prefix = ' '
7799 let s: line_cur_suffix = ' '
78100
101+ let s: ring_n_chunks = []
102+
103+ let s: pos_y_pick = -9999 " last y where we picked a chunk
79104 let s: pos_dx = 0
80105 let s: content = []
81106 let s: can_accept = v: false
@@ -91,12 +116,55 @@ function! llama#init()
91116 autocmd InsertEnter * inoremap <buffer> <silent> <C-F> <C-O> :call llama#fim(v:false)<CR>
92117 autocmd InsertLeavePre * call llama#fim_cancel ()
93118
94- autocmd CursorMoved * call llama#fim_cancel ()
119+ autocmd CursorMoved * call llama#fim_cancel ()
120+
121+ autocmd TextYankPost * if v: event .operator == # ' y' | call s: pick_chunk (v: event .regcontents, v: false ) | endif
122+
123+ autocmd BufEnter * call timer_start (100 , {- > s: pick_chunk (getline (max ([1 , line (' .' ) - g: llama_config .ring_chunk_size/2]), min([line('.') + g:llama_config.ring_chunk_size/ 2 , line (' $' )])), v: true )})
95124 augroup END
96125
97126 silent ! call llama#fim_cancel ()
98127endfunction
99128
129+ function ! s: pick_chunk (text, no_mod)
130+ " do not pick chunks from buffers with pending changes or buffers that are not files
131+ if a: no_mod && (getbufvar (bufnr (' %' ), ' &modified' ) || ! buflisted (bufnr (' %' )) || ! filereadable (expand (' %' )))
132+ return
133+ endif
134+
135+ if g: llama_config .ring_n_chunks <= 0
136+ return
137+ endif
138+
139+ if len (a: text ) + 1 < g: llama_config .ring_chunk_size
140+ let l: chunk = join (a: text , " \n " )
141+ else
142+ let l: l0 = s: rand (0 , len (a: text ) - g: llama_config .ring_chunk_size)
143+ let l: l1 = l: l0 + g: llama_config .ring_chunk_size
144+
145+ let l: chunk = join (a: text [l: l0 :l: l1 ], " \n " )
146+ endif
147+
148+ " check if this chunk is already added
149+ let l: exist = v: false
150+ for i in range (len (s: ring_n_chunks ))
151+ if s: ring_n_chunks [i ] == l: chunk
152+ let l: exist = v: true
153+ break
154+ endif
155+ endfor
156+
157+ if l: exist
158+ return
159+ endif
160+
161+ if len (s: ring_n_chunks ) == g: llama_config .ring_n_chunks
162+ call remove (s: ring_n_chunks , 0 )
163+ endif
164+
165+ call add (s: ring_n_chunks , l: chunk )
166+ endfunction
167+
100168function ! llama#fim (is_auto) abort
101169 let s: t_fim_start = reltime ()
102170
@@ -128,6 +196,20 @@ function! llama#fim(is_auto) abort
128196 \ . join (l: lines_suffix , " \n " )
129197 \ . " \n "
130198
199+ " TODO: per-file location
200+ let l: delta_y = abs (s: pos_y - s: pos_y_pick )
201+
202+ " only gather chunks if the cursor has moved a lot
203+ if a: is_auto && l: delta_y > 32
204+ " pick a prefix chunk
205+ call s: pick_chunk (getline (max ([1 , s: pos_y - g: llama_config .ring_scope]), max ([1 , s: pos_y - g: llama_config .n_prefix])), v: false )
206+
207+ " " pick a suffix chunk
208+ call s: pick_chunk (getline (min ([l: max_y , s: pos_y + g: llama_config .n_suffix]), min ([l: max_y , s: pos_y + g: llama_config .ring_scope])), v: false )
209+
210+ let s: pos_y_pick = s: pos_y
211+ endif
212+
131213 let l: request = json_encode ({
132214 \ ' prompt' : " " ,
133215 \ ' input_prefix' : l: prefix ,
@@ -137,7 +219,8 @@ function! llama#fim(is_auto) abort
137219 \ ' top_k' : 100 ,
138220 \ ' stream' : v: false ,
139221 \ ' samplers' : [" top_k" , " infill" ],
140- " \ 'cache_prompt': v:true,
222+ \ ' cache_prompt' : v: true ,
223+ \ ' extra_context' : s: ring_n_chunks ,
141224 \ ' t_max_prompt_ms' : g: llama_config .t_max_prompt_ms,
142225 \ ' t_max_predict_ms' : g: llama_config .t_max_predict_ms
143226 \ })
@@ -235,6 +318,7 @@ function! s:fim_auto()
235318 call jobstop (s: current_job )
236319 endif
237320
321+ " TODO: when job cancellation is implemented on the server, reduce these timeouts
238322 if reltimefloat (reltime (s: t_fim_last )) < 500 * 0.001
239323 if s: timer_fim != -1
240324 call timer_stop (s: timer_fim )
@@ -284,6 +368,11 @@ function! s:fim_on_stdout(job_id, data, event) dict
284368 call remove (s: content , -1 )
285369 endwhile
286370
371+ let l: generation_settings = get (l: response , ' generation_settings' , {})
372+ let l: n_ctx = get (l: generation_settings , ' n_ctx' , 0 )
373+
374+ let l: n_cached = get (l: response , ' tokens_cached' , 0 )
375+
287376 " if response.timings is available
288377 if len (get (l: response , ' timings' , {})) > 0
289378 let l: has_info = v: true
@@ -322,21 +411,26 @@ function! s:fim_on_stdout(job_id, data, event) dict
322411 let l: id_vt_info = nvim_create_namespace (' vt_info' )
323412
324413 " construct the info message and display it to the right of the current line
325- if g: llama_config .show_info && l: has_info
414+ if g: llama_config .show_info > 0 && l: has_info
326415 " prefix the info string with whitespace in order to offset it to the right of the fim overlay
327416 let l: prefix = repeat (' ' , len (s: content [0 ]) - len (s: line_cur_suffix ) + 3 )
328417
329- let l: info = printf (" %s | prompt: %d (%.2f ms, %.2f t/s) | predict: %d (%.2f ms, %.2f t/s) | total: %.2f ms" ,
330- \ l: prefix ,
418+ let l: info = printf (" %s | context: %d / %d | prompt: %d (%.2f ms, %.2f t/s) | predict: %d (%.2f ms, %.2f t/s) | total: %.2f ms" ,
419+ \ g: llama_config .show_info == 2 ? l: prefix : ' ' ,
420+ \ l: n_cached , l: n_ctx ,
331421 \ l: n_prompt , l: t_prompt_ms , l: s_prompt ,
332422 \ l: n_predict , l: t_predict_ms , l: s_predict ,
333423 \ 1000.0 * reltimefloat (reltime (s: t_fim_start ))
334424 \ )
335425
336- call nvim_buf_set_extmark (l: bufnr , l: id_vt_info , s: pos_y - 1 , s: pos_x - 1 , {
337- \ ' virt_text' : [[l: info , ' llama_hl_info' ]],
338- \ ' virt_text_pos' : ' eol' ,
339- \ })
426+ if g: llama_config .show_info == 1
427+ let &statusline = l: info
428+ elseif g: llama_config .show_info == 2
429+ call nvim_buf_set_extmark (l: bufnr , l: id_vt_info , s: pos_y - 1 , s: pos_x - 1 , {
430+ \ ' virt_text' : [[l: info , ' llama_hl_info' ]],
431+ \ ' virt_text_pos' : ' eol' ,
432+ \ })
433+ endif
340434 endif
341435
342436 " display the suggestion
0 commit comments