info: cached info message (#21)

VJHack · ggerganov · web-flow · commit 3cc84b0d0183 · 2025-01-06T13:58:42.000+02:00
* added cached info message

* optimize space is cache

* comment clarification

* handle empty caches

---------

Co-authored-by: Georgi Gerganov &lt;ggerganov@gmail.com&gt;
diff --git a/autoload/llama.vim b/autoload/llama.vim
@@ -586,30 +586,35 @@ function! s:insert_cache(key, value)
         let l:hash = l:keys[rand() % len(l:keys)]
         call remove(g:result_cache, l:hash)
     endif
-    let g:result_cache[a:key] = a:value
+    " put just the raw content in the cache without metrics
+    let l:parsed_value = json_decode(a:value)
+    let l:stripped_content = get(l:parsed_value, 'content', '')
+    let g:result_cache[a:key] = json_encode({'content': l:stripped_content})
 endfunction
 
 " callback that processes the FIM result from the server and displays the suggestion
 function! s:fim_on_stdout(hash, cache, pos_x, pos_y, is_auto, job_id, data, event = v:null)
+    " make sure cursor position hasn't changed since fim_on_stdout was triggered
+    if a:pos_x != col('.') - 1 || a:pos_y != line('.')
+        return
+    endif
+
+    " show the suggestion only in insert mode
+    if mode() !=# 'i'
+        return
+    endif
+
     " Retrieve the FIM result from cache
     if a:cache && has_key(g:result_cache, a:hash)
         let l:raw = get(g:result_cache, a:hash)
+        let l:is_cached = v:true
     else
         if s:ghost_text_nvim
             let l:raw = join(a:data, "\n")
         elseif s:ghost_text_vim
             let l:raw = a:data
         endif
-        call s:insert_cache(a:hash, l:raw)
-    endif
-
-    if a:pos_x != col('.') - 1 || a:pos_y != line('.')
-        return
-    endif
-
-    " show the suggestion only in insert mode
-    if mode() !=# 'i'
-        return
+        let l:is_cached = v:false
     endif
 
     " TODO: this does not seem to work as expected, so disabling for now
@@ -623,6 +628,10 @@ function! s:fim_on_stdout(hash, cache, pos_x, pos_y, is_auto, job_id, data, even
         return
     endif
 
+    if !l:is_cached
+        call s:insert_cache(a:hash, l:raw)
+    endif
+
     let s:pos_x = a:pos_x
     let s:pos_y = a:pos_y
 
@@ -669,6 +678,11 @@ function! s:fim_on_stdout(hash, cache, pos_x, pos_y, is_auto, job_id, data, even
             let l:t_predict_ms = get(l:timings, 'predicted_ms', 1)
             let l:s_predict    = get(l:timings, 'predicted_per_second', 0)
         endif
+
+        " if response was pulled from cache
+        if l:is_cached
+            let l:has_info = v:true
+        endif
     endif
 
     if len(s:content) == 0
@@ -759,6 +773,12 @@ function! s:fim_on_stdout(hash, cache, pos_x, pos_y, is_auto, job_id, data, even
                 \ g:llama_config.show_info == 2 ? l:prefix : 'llama.vim',
                 \ l:n_cached, l:n_ctx
                 \ )
+        elseif l:is_cached
+            let l:info = printf("%s | C: %d / %d, | t: %.2f ms",
+                \ g:llama_config.show_info == 2 ? l:prefix : 'llama.vim',
+                \ len(keys(g:result_cache)), g:llama_config.max_cache_keys,
+                \ 1000.0 * reltimefloat(reltime(s:t_fim_start))
+                \ )
         else
             let l:info = printf("%s | c: %d / %d, r: %d / %d, e: %d, q: %d / 16 | p: %d (%.2f ms, %.2f t/s) | g: %d (%.2f ms, %.2f t/s) | t: %.2f ms",
                 \ g:llama_config.show_info == 2 ? l:prefix : 'llama.vim',