Skip to content

Commit c02981d

Browse files
committed
llama.vim : do not evict certain chunks [no ci]
1 parent bce0d5d commit c02981d

File tree

2 files changed

+21
-16
lines changed

2 files changed

+21
-16
lines changed

examples/llama.vim

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ let s:default_config = {
7171
\ 'show_info': 2,
7272
\ 'auto_fim': v:true,
7373
\ 'ring_n_chunks': 32,
74-
\ 'ring_chunk_size': 64,
74+
\ 'ring_chunk_size': 128,
7575
\ 'ring_scope': 1024,
7676
\ }
7777

@@ -119,11 +119,11 @@ function! llama#init()
119119

120120
autocmd CursorMoved * call llama#fim_cancel()
121121

122-
autocmd TextYankPost * if v:event.operator ==# 'y' | call s:pick_chunk(v:event.regcontents, v:false) | endif
122+
autocmd TextYankPost * if v:event.operator ==# 'y' | call s:pick_chunk(v:event.regcontents, v:false, v:true) | endif
123123

124124
" gather chunks upon entering/leaving a buffer
125-
autocmd BufEnter * call timer_start(100, {-> s:pick_chunk(getline(max([1, line('.') - g:llama_config.ring_chunk_size/2]), min([line('.') + g:llama_config.ring_chunk_size/2, line('$')])), v:true)})
126-
autocmd BufLeave * call s:pick_chunk(getline(max([1, line('.') - g:llama_config.ring_chunk_size/2]), min([line('.') + g:llama_config.ring_chunk_size/2, line('$')])), v:true)
125+
autocmd BufEnter * call timer_start(100, {-> s:pick_chunk(getline(max([1, line('.') - g:llama_config.ring_chunk_size/2]), min([line('.') + g:llama_config.ring_chunk_size/2, line('$')])), v:true, v:true)})
126+
autocmd BufLeave * call s:pick_chunk(getline(max([1, line('.') - g:llama_config.ring_chunk_size/2]), min([line('.') + g:llama_config.ring_chunk_size/2, line('$')])), v:true, v:true)
127127
augroup END
128128

129129
silent! call llama#fim_cancel()
@@ -148,7 +148,7 @@ function! s:chunk_sim(c0, c1)
148148
return 2.0 * l:common / (l:lines0 + l:lines1)
149149
endfunction
150150

151-
function! s:pick_chunk(text, no_mod)
151+
function! s:pick_chunk(text, no_mod, do_evict)
152152
" do not pick chunks from buffers with pending changes or buffers that are not files
153153
if a:no_mod && (getbufvar(bufnr('%'), '&modified') || !buflisted(bufnr('%')) || !filereadable(expand('%')))
154154
return
@@ -165,8 +165,8 @@ function! s:pick_chunk(text, no_mod)
165165
if len(a:text) + 1 < g:llama_config.ring_chunk_size
166166
let l:chunk = a:text
167167
else
168-
let l:l0 = s:rand(0, max([0, len(a:text) - g:llama_config.ring_chunk_size]))
169-
let l:l1 = min([l:l0 + g:llama_config.ring_chunk_size, len(a:text)])
168+
let l:l0 = s:rand(0, max([0, len(a:text) - g:llama_config.ring_chunk_size/2]))
169+
let l:l1 = min([l:l0 + g:llama_config.ring_chunk_size/2, len(a:text)])
170170

171171
let l:chunk = a:text[l:l0:l:l1]
172172
endif
@@ -189,8 +189,12 @@ function! s:pick_chunk(text, no_mod)
189189
" evict chunks that are very similar to the new one
190190
for i in range(len(s:ring_chunks) - 1, 0, -1)
191191
if s:chunk_sim(s:ring_chunks[i].data, l:chunk) > 0.9
192-
call remove(s:ring_chunks, i)
193-
let s:ring_n_evict += 1
192+
if a:do_evict
193+
call remove(s:ring_chunks, i)
194+
let s:ring_n_evict += 1
195+
else
196+
return
197+
endif
194198
endif
195199
endfor
196200

@@ -237,11 +241,12 @@ function! llama#fim(is_auto) abort
237241

238242
" only gather chunks if the cursor has moved a lot
239243
if a:is_auto && l:delta_y > 32
240-
" pick a prefix chunk
241-
call s:pick_chunk(getline(max([1, s:pos_y - g:llama_config.ring_scope]), max([1, s:pos_y - g:llama_config.n_prefix])), v:false)
242-
243-
" pick a suffix chunk
244-
call s:pick_chunk(getline(min([l:max_y, s:pos_y + g:llama_config.n_suffix]), min([l:max_y, s:pos_y + g:llama_config.ring_scope])), v:false)
244+
" randomly pick a prefix or a suffix chunk
245+
if s:rand(0, 1)
246+
call s:pick_chunk(getline(max([1, s:pos_y - g:llama_config.ring_scope]), max([1, s:pos_y - g:llama_config.n_prefix])), v:false, v:false)
247+
else
248+
call s:pick_chunk(getline(min([l:max_y, s:pos_y + g:llama_config.n_suffix]), min([l:max_y, s:pos_y + g:llama_config.ring_scope])), v:false, v:false)
249+
endif
245250

246251
let s:pos_y_pick = s:pos_y
247252
endif

examples/server/server.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1966,11 +1966,11 @@ struct server_context {
19661966
}
19671967

19681968
// for now pick FIM context to fit in half batch (ratio prefix:suffix = 3:1, TODO: configurable?)
1969-
const int n_suffix_take = std::min<int>(suffix_tokens.size(), (n_batch/4)/2);
1969+
const int n_suffix_take = std::min<int>(suffix_tokens.size(), (n_batch/2)/4);
19701970
const int n_prefix_take = std::min<int>(prefix_tokens.size(), (n_batch/2 - 3) - n_suffix_take);
19711971

19721972
// fill the rest of the context with extra chunks
1973-
const int n_extra_take = std::min<int>(std::max<int>(0, slot.n_ctx - n_batch - 2*slot.n_predict), slot.extra_tokens.size());
1973+
const int n_extra_take = std::min<int>(std::max<int>(0, slot.n_ctx - (n_batch/2) - 2*slot.n_predict), slot.extra_tokens.size());
19741974

19751975
prefix_tokens.erase(prefix_tokens.begin(), prefix_tokens.begin() + prefix_tokens.size() - n_prefix_take);
19761976
suffix_tokens.resize(n_suffix_take);

0 commit comments

Comments
 (0)