3333"
3434" :call llama#init()
3535"
36- " more info: https://github.com/ggerganov/llama.cpp/pull/9787/files
36+ " more info: https://github.com/ggerganov/llama.cpp/pull/9787
3737"
3838
3939" colors (adjust to your liking)
@@ -46,7 +46,7 @@ highlight llama_hl_info guifg=#77ff2f
4646" n_prefix: number of lines before the cursor location to include in the prefix
4747" n_suffix: number of lines after the cursor location to include in the suffix
4848" n_predict: max number of tokens to predict
49- " t_max_prompt_ms: max alloted time for the prompt generation (TODO: not yet supported)
49+ " t_max_prompt_ms: max alloted time for the prompt processing (TODO: not yet supported)
5050" t_max_predict_ms: max alloted time for the prediction
5151" show_info: show extra info about the inference (0 - disabled, 1 - statusline, 2 - inline)
5252" auto_fim: trigger FIM completion automatically on cursor movement
@@ -99,8 +99,8 @@ function! llama#init()
9999 return
100100 endif
101101
102- let s: pos_x = 0 " cursor position upon start of completion
103- let s: pos_y = 0
102+ let s: pos_x = 0 " cursor position upon start of completion
103+ let s: pos_y = 0
104104
105105 let s: line_cur = ' '
106106
@@ -329,8 +329,7 @@ function! llama#fim_inline(is_auto, on_hold) abort
329329endfunction
330330
331331" the main FIM call
332- " takes local context around the cursor and sends it together with the extra context
333- " to the llama.cpp server for completion
332+ " takes local context around the cursor and sends it together with the extra context to the server for completion
334333function ! llama#fim (is_auto, on_hold) abort
335334 " we already have a suggestion for the current cursor position
336335 if a: on_hold && (s: hint_shown || (s: pos_x == col (' .' ) - 1 && s: pos_y == line (' .' )))
@@ -569,13 +568,50 @@ function! s:fim_on_stdout(job_id, data, event) dict
569568 endif
570569
571570 let s: pos_dx = len (s: content [-1 ])
572- let s: content [-1 ] .= s: line_cur_suffix
573571
574- " truncate the suggestion if it repeats the following lines
575- if len (s: content ) > 1 && s: content [1 ] == getline (s: pos_y + 1 )
576- let s: content = [s: content [0 ]]
572+ " NOTE: the following is logic for discarding predictions that repeat existing text
573+ " the code is quite ugly and there is very likely a simpler and more canonical way to implement this
574+ "
575+ " still, I wonder if there is some better way that avoids having to do these special hacks?
576+ " on one hand, the LLM 'sees' the contents of the file before we start editing, so it is normal that it would
577+ " start generating whatever we have given it via the extra context. but on the other hand, it's not very
578+ " helpful to re-generate the same code that is already there
579+
580+ " truncate the suggestion if the first line is empty
581+ if s: content [0 ] == " "
582+ let s: content = [" " ]
583+ endif
584+
585+ " truncate the suggestion if it repeats the suffix
586+ if len (s: content ) == 1 && s: content [0 ] == s: line_cur_suffix
587+ let s: content = [" " ]
577588 endif
578589
590+ " find the first non-empty line (strip whitespace)
591+ let l: cmp_y = s: pos_y + 1
592+ while l: cmp_y < line (' $' ) && getline (l: cmp_y ) = ~? ' ^\s*$'
593+ let l: cmp_y += 1
594+ endwhile
595+
596+ if (s: line_cur_prefix . s: content [0 ]) == getline (l: cmp_y )
597+ " truncate the suggestion if it repeats the next line
598+ if len (s: content ) == 1
599+ let s: content = [" " ]
600+ endif
601+
602+ " ... or if the second line of the suggestion is the prefix of line l:cmp_y + 1
603+ if len (s: content ) == 2 && s: content [-1 ] == getline (l: cmp_y + 1 )[: len (s: content [-1 ]) - 1 ]
604+ let s: content = [" " ]
605+ endif
606+
607+ " ... or if the middle chunk of lines of the suggestion is the same as [l:cmp_y + 1, l:cmp_y + len(s:content) - 1)
608+ if len (s: content ) > 2 && join (s: content [1 :-1 ], " \n " ) == join (getline (l: cmp_y + 1 , l: cmp_y + len (s: content ) - 1 ), " \n " )
609+ let s: content = [" " ]
610+ endif
611+ endif
612+
613+ let s: content [-1 ] .= s: line_cur_suffix
614+
579615 call llama#fim_cancel ()
580616
581617 " display virtual text with the suggestion
@@ -595,9 +631,9 @@ function! s:fim_on_stdout(job_id, data, event) dict
595631 \ l: n_cached , l: n_ctx
596632 \ )
597633 else
598- let l: info = printf (" %s | context : %d / %d / r=%d / q=%d / e= %d | prompt : %d (%.2f ms, %.2f t/s) | predict : %d (%.2f ms, %.2f t/s) | total : %.2f ms" ,
634+ let l: info = printf (" %s | c : %d / %d, r: %d, e: %d, q: %d | p : %d (%.2f ms, %.2f t/s) | g : %d (%.2f ms, %.2f t/s) | t : %.2f ms" ,
599635 \ g: llama_config .show_info == 2 ? l: prefix : ' llama.vim' ,
600- \ l: n_cached , l: n_ctx , len (s: ring_chunks ), len ( s: ring_queued ), s: ring_n_evict ,
636+ \ l: n_cached , l: n_ctx , len (s: ring_chunks ), s: ring_n_evict , len ( s: ring_queued ) ,
601637 \ l: n_prompt , l: t_prompt_ms , l: s_prompt ,
602638 \ l: n_predict , l: t_predict_ms , l: s_predict ,
603639 \ 1000.0 * reltimefloat (reltime (s: t_fim_start ))
@@ -627,7 +663,7 @@ function! s:fim_on_stdout(job_id, data, event) dict
627663 \ ' virt_text_win_col' : virtcol (' .' )
628664 \ })
629665
630- " setup accept/cancel events
666+ " setup accept shortcuts
631667 inoremap <buffer> <Tab> <C-O> :call llama#fim_accept(v:false)<CR>
632668 inoremap <buffer> <S-Tab> <C-O> :call llama#fim_accept(v:true)<CR>
633669
0 commit comments