1+ " LLM-based text completion using llama.cpp
2+ "
3+ " requires:
4+ "
5+ " - neovim
6+ " - curl
7+ " - llama.cpp server instance
8+ " - FIM-compatible model
9+ "
110" sample config:
211"
3- " - Ctrl+F - trigger FIM completion manually
12+ " - Tab - accept the current suggestion
13+ " - Shift+Tab - accept just the first line of the segguestion
14+ " - Ctrl+F - trigger FIM completion manually
15+ "
16+ " make symlink or copy this file to ~/.config/nvim/autoload/llama.vim
17+ "
18+ " start the llama.cpp server with a FIM-compatible model. for example:
19+ "
20+ " $ llama-server -m {model.gguf} --port 8012 -ngl 99 -fa --ubatch-size 1024 --batch-size 2048
21+ "
22+ " --batch-size [512, model max context]
23+ "
24+ " adjust the batch size to control how much of the provided context will be used during the inference
25+ " lower values will use smaller part of the context around the cursor, which will result in faster processing
426"
5- " run this once to initialise the plugin:
27+ " --ubatch-size [64, 2048]
628"
7- " :call llama#init()
29+ " chunks the batch into smaller chunks for faster processing
30+ " depends on the specific hardware. use llama-bench to profile and determine the best size
31+ "
32+ " run this once to initialise llama.vim:
33+ "
34+ " :call llama#init()
835"
936
1037" color of the suggested text
1138highlight llama_hl_hint guifg= #ff772f
1239highlight llama_hl_info guifg= #77 ff2f
1340
41+ " endpoint: llama.cpp server endpoint
42+ " n_prefix: number of lines to include in the prefix
43+ " n_suffix: number of lines to include in the suffix
44+ " n_predict: max number of tokens to predict
45+ " t_max_prompt_ms: max alloted time for the text generation
46+ " show_info: show extra info about the inference
47+ " auto_fim: trigger FIM completion automatically on cursor movement
1448let s: default_config = {
1549 \ ' endpoint' : ' http://127.0.0.1:8012/infill' ,
16- \ ' n_prefix' : 128 ,
17- \ ' n_suffix' : 128 ,
50+ \ ' n_prefix' : 256 ,
51+ \ ' n_suffix' : 256 ,
1852 \ ' n_predict' : 64 ,
19- \ ' t_max_prompt_ms' : 300 ,
53+ \ ' t_max_prompt_ms' : 500 ,
2054 \ ' t_max_predict_ms' : 200 ,
55+ \ ' show_info' : v: true ,
2156 \ ' auto_fim' : v: true ,
22- \ ' stop' : [" \n " ]
2357 \ }
2458
2559let g: llama_config = get (g: , ' llama_config' , s: default_config )
2660
2761function ! llama#init ()
28- let s: pos_x = 0
62+ if ! executable (' curl' )
63+ echohl WarningMsg
64+ echo ' llama.vim requires the "curl" command to be available'
65+ echohl None
66+ return
67+ endif
68+
69+ let s: pos_x = 0 " cursor position upon start of completion
2970 let s: pos_y = 0
3071 let s: pos_x0 = 0 " pos_x corrected for end-of-line edge case
3172
@@ -46,8 +87,8 @@ function! llama#init()
4687
4788 augroup llama
4889 autocmd !
49- autocmd InsertEnter * inoremap <buffer> <silent> <C-F> <C-O> :call llama#fim(v:false)<CR>
50- autocmd InsertLeave * call llama#fim_cancel ()
90+ autocmd InsertEnter * inoremap <buffer> <silent> <C-F> <C-O> :call llama#fim(v:false)<CR>
91+ autocmd InsertLeavePre * call llama#fim_cancel ()
5192
5293 autocmd CursorMoved * call llama#fim_cancel ()
5394 augroup END
@@ -90,7 +131,6 @@ function! llama#fim(is_auto) abort
90131 \ ' prompt' : " " ,
91132 \ ' input_prefix' : l: prefix ,
92133 \ ' input_suffix' : l: suffix ,
93- " \ 'stop': g:llama_config.stop,
94134 \ ' n_predict' : g: llama_config .n_predict,
95135 \ ' penalty_last_n' : 0 ,
96136 \ ' top_k' : 100 ,
@@ -126,16 +166,23 @@ function! llama#fim(is_auto) abort
126166 endif
127167endfunction
128168
129- function ! llama#fim_accept ()
169+ " if first_line == v:true accept only the first line of the response
170+ function ! llama#fim_accept (first_line)
130171 " insert the suggestion at the cursor location
131172 if s: can_accept && len (s: content ) > 0
132173 call setline (s: pos_y , s: line_cur [:(s: pos_x0 - 1 )] . s: content [0 ])
133174 if len (s: content ) > 1
134- call append (s: pos_y , s: content [1 :-1 ])
175+ if ! a: first_line
176+ call append (s: pos_y , s: content [1 :-1 ])
177+ endif
135178 endif
136179
137180 " move the cursor to the end of the accepted text
138- call cursor (s: pos_y + len (s: content ) - 1 , s: pos_x + s: pos_dx )
181+ if ! a: first_line
182+ call cursor (s: pos_y + len (s: content ) - 1 , s: pos_x + s: pos_dx )
183+ else
184+ call cursor (s: pos_y , s: pos_x + len (s: content [0 ]) - 1 )
185+ endif
139186 endif
140187
141188 call llama#fim_cancel ()
@@ -146,6 +193,11 @@ function! llama#fim_cancel()
146193 call jobstop (s: current_job )
147194 endif
148195
196+ if s: timer_fim != -1
197+ call timer_stop (s: timer_fim )
198+ let s: timer_fim = -1
199+ endif
200+
149201 " clear the virtual text
150202 let l: bufnr = bufnr (' %' )
151203
@@ -155,7 +207,9 @@ function! llama#fim_cancel()
155207 call nvim_buf_clear_namespace (l: bufnr , l: id_vt_fim , 0 , -1 )
156208 call nvim_buf_clear_namespace (l: bufnr , l: id_vt_info , 0 , -1 )
157209
210+ " remove the mappings
158211 silent ! iunmap <buffer> <Tab>
212+ silent ! iunmap <buffer> <S-Tab>
159213 silent ! iunmap <buffer> <Esc>
160214
161215 augroup llama_insert
@@ -173,6 +227,8 @@ function! s:fim_auto_enable()
173227 augroup END
174228endfunction
175229
230+ " auto-start a fim job a short time after the cursor has moved
231+ " if there is already a job queued - cancel it
176232function ! s: fim_auto ()
177233 if s: current_job != v: null
178234 call jobstop (s: current_job )
@@ -189,7 +245,7 @@ function! s:fim_auto()
189245 let s: timer_fim = timer_start (500 , {- > llama#fim (v: true )})
190246endfunction
191247
192-
248+ " callback that processes the result from the server
193249function ! s: fim_on_stdout (job_id, data, event ) dict
194250 let l: raw = join (a: data , " \n " )
195251 if len (l: raw ) == 0
@@ -199,6 +255,13 @@ function! s:fim_on_stdout(job_id, data, event) dict
199255 let s: can_accept = v: true
200256 let l: has_info = v: false
201257
258+ if s: can_accept && v: shell_error
259+ if ! self .is_auto
260+ call add (s: content , " <| curl error: is the server on? |>" )
261+ endif
262+ let s: can_accept = v: false
263+ endif
264+
202265 let l: n_prompt = 0
203266 let l: t_prompt_ms = 1.0
204267 let l: s_prompt = 0
@@ -207,13 +270,6 @@ function! s:fim_on_stdout(job_id, data, event) dict
207270 let l: t_predict_ms = 1.0
208271 let l: s_predict = 0
209272
210- if s: can_accept && v: shell_error
211- if ! self .is_auto
212- call add (s: content , " <| curl error: is the server on? |>" )
213- endif
214- let s: can_accept = v: false
215- endif
216-
217273 " get the generated suggestion
218274 if s: can_accept
219275 let l: response = json_decode (l: raw )
@@ -227,7 +283,7 @@ function! s:fim_on_stdout(job_id, data, event) dict
227283 call remove (s: content , -1 )
228284 endwhile
229285
230- " if response.timings
286+ " if response.timings is available
231287 if len (get (l: response , ' timings' , {})) > 0
232288 let l: has_info = v: true
233289 let l: timings = get (l: response , ' timings' , {})
@@ -264,8 +320,8 @@ function! s:fim_on_stdout(job_id, data, event) dict
264320 let l: id_vt_fim = nvim_create_namespace (' vt_fim' )
265321 let l: id_vt_info = nvim_create_namespace (' vt_info' )
266322
267- " construct the info message:
268- if l: has_info
323+ " construct the info message and display it to the right of the current line
324+ if g: llama_config .show_info && l: has_info
269325 " prefix the info string with whitespace in order to offset it to the right of the fim overlay
270326 let l: prefix = repeat (' ' , len (s: content [0 ]) - len (s: line_cur_suffix ) + 3 )
271327
@@ -282,6 +338,7 @@ function! s:fim_on_stdout(job_id, data, event) dict
282338 \ })
283339 endif
284340
341+ " display the suggestion
285342 call nvim_buf_set_extmark (l: bufnr , l: id_vt_fim , s: pos_y - 1 , s: pos_x - 1 , {
286343 \ ' virt_text' : [[s: content [0 ], ' llama_hl_hint' ]],
287344 \ ' virt_text_win_col' : virtcol (' .' ) - 1
@@ -293,8 +350,8 @@ function! s:fim_on_stdout(job_id, data, event) dict
293350 \ })
294351
295352 " setup accept/cancel events
296- inoremap <buffer> <Tab> <C-O> :call llama#fim_accept()<CR>
297- inoremap <buffer> <Esc > <C-O> :call llama#fim_cancel( )<CR><Esc >
353+ inoremap <buffer> <Tab> <C-O> :call llama#fim_accept(v:false )<CR>
354+ inoremap <buffer> <S-Tab > <C-O> :call llama#fim_accept(v:true )<CR>
298355
299356 augroup llama_insert
300357 autocmd !
0 commit comments