@@ -98,7 +98,8 @@ function! llama#init()
9898 let s: line_cur_prefix = ' '
9999 let s: line_cur_suffix = ' '
100100
101- let s: ring_n_chunks = []
101+ let s: ring_chunks = []
102+ let s: ring_n_evict = 0
102103
103104 let s: pos_y_pick = -9999 " last y where we picked a chunk
104105 let s: pos_dx = 0
@@ -128,6 +129,25 @@ function! llama#init()
128129 silent ! call llama#fim_cancel ()
129130endfunction
130131
132+ " TODO: figure out something better
133+ function ! s: chunk_sim (c0, c1)
134+ let l: lines0 = len (a: c0 )
135+ let l: lines1 = len (a: c1 )
136+
137+ let l: common = 0
138+
139+ for l: line0 in a: c0
140+ for l: line1 in a: c1
141+ if l: line0 == l: line1
142+ let l: common += 1
143+ break
144+ endif
145+ endfor
146+ endfor
147+
148+ return 2.0 * l: common / (l: lines0 + l: lines1 )
149+ endfunction
150+
131151function ! s: pick_chunk (text, no_mod)
132152 " do not pick chunks from buffers with pending changes or buffers that are not files
133153 if a: no_mod && (getbufvar (bufnr (' %' ), ' &modified' ) || ! buflisted (bufnr (' %' )) || ! filereadable (expand (' %' )))
@@ -138,20 +158,25 @@ function! s:pick_chunk(text, no_mod)
138158 return
139159 endif
140160
161+ if len (a: text ) < 3
162+ return
163+ endif
164+
141165 if len (a: text ) + 1 < g: llama_config .ring_chunk_size
142- let l: chunk = join ( a: text, " \n " )
166+ let l: chunk = a: text
143167 else
144- let l: l0 = s: rand (0 , len (a: text ) - g: llama_config .ring_chunk_size)
145- let l: l1 = l: l0 + g: llama_config .ring_chunk_size
168+ let l: l0 = s: rand (0 , max ([ 0 , len (a: text ) - g: llama_config .ring_chunk_size]) )
169+ let l: l1 = min ([ l: l0 + g: llama_config .ring_chunk_size, len ( a: text )])
146170
147- let l: chunk = join ( a: text [l: l0 :l: l1 ], " \n " )
171+ let l: chunk = a: text [l: l0 :l: l1 ]
148172 endif
149173
174+ let l: chunk_str = join (l: chunk , " \n " )
175+
150176 " check if this chunk is already added
151- " TODO: smarter check for string similarity to evict old chunks that are very similart to the new one
152177 let l: exist = v: false
153- for i in range (len (s: ring_n_chunks ))
154- if s: ring_n_chunks [i ] == l: chunk
178+ for i in range (len (s: ring_chunks ))
179+ if s: ring_chunks [i ].data == l: chunk
155180 let l: exist = v: true
156181 break
157182 endif
@@ -161,11 +186,19 @@ function! s:pick_chunk(text, no_mod)
161186 return
162187 endif
163188
164- if len (s: ring_n_chunks ) == g: llama_config .ring_n_chunks
165- call remove (s: ring_n_chunks , 0 )
189+ " evict chunks that are very similar to the new one
190+ for i in range (len (s: ring_chunks ) - 1 , 0 , -1 )
191+ if s: chunk_sim (s: ring_chunks [i ].data, l: chunk ) > 0.9
192+ call remove (s: ring_chunks , i )
193+ let s: ring_n_evict += 1
194+ endif
195+ endfor
196+
197+ if len (s: ring_chunks ) == g: llama_config .ring_n_chunks
198+ call remove (s: ring_chunks , 0 )
166199 endif
167200
168- call add (s: ring_n_chunks , l: chunk )
201+ call add (s: ring_chunks , { ' data ' : l: chunk, ' str ' : l: chunk_str , ' time ' : reltime ()} )
169202endfunction
170203
171204function ! llama#fim (is_auto) abort
@@ -213,6 +246,12 @@ function! llama#fim(is_auto) abort
213246 let s: pos_y_pick = s: pos_y
214247 endif
215248
249+ " array of strings
250+ let l: extra_context = []
251+ for l: chunk in s: ring_chunks
252+ call add (l: extra_context , l: chunk .str)
253+ endfor
254+
216255 let l: request = json_encode ({
217256 \ ' prompt' : " " ,
218257 \ ' input_prefix' : l: prefix ,
@@ -223,7 +262,7 @@ function! llama#fim(is_auto) abort
223262 \ ' stream' : v: false ,
224263 \ ' samplers' : [" top_k" , " infill" ],
225264 \ ' cache_prompt' : v: true ,
226- \ ' extra_context' : s: ring_n_chunks ,
265+ \ ' extra_context' : l: extra_context ,
227266 \ ' t_max_prompt_ms' : g: llama_config .t_max_prompt_ms,
228267 \ ' t_max_predict_ms' : g: llama_config .t_max_predict_ms
229268 \ })
@@ -418,9 +457,9 @@ function! s:fim_on_stdout(job_id, data, event) dict
418457 " prefix the info string with whitespace in order to offset it to the right of the fim overlay
419458 let l: prefix = repeat (' ' , len (s: content [0 ]) - len (s: line_cur_suffix ) + 3 )
420459
421- let l: info = printf (" %s | context: %d / %d | prompt: %d (%.2f ms, %.2f t/s) | predict: %d (%.2f ms, %.2f t/s) | total: %.2f ms" ,
460+ let l: info = printf (" %s | context: %d / %d / %d / %d | prompt: %d (%.2f ms, %.2f t/s) | predict: %d (%.2f ms, %.2f t/s) | total: %.2f ms" ,
422461 \ g: llama_config .show_info == 2 ? l: prefix : ' ' ,
423- \ l: n_cached , l: n_ctx ,
462+ \ l: n_cached , l: n_ctx , len ( s: ring_chunks ), s: ring_n_evict ,
424463 \ l: n_prompt , l: t_prompt_ms , l: s_prompt ,
425464 \ l: n_predict , l: t_predict_ms , l: s_predict ,
426465 \ 1000.0 * reltimefloat (reltime (s: t_fim_start ))
0 commit comments