Skip to content

Commit 9f5cadd

Browse files
VJHackggerganov
andauthored
llama.vim: filter server response fields (#24)
* filter response fields * clean up * response fields for ring update * formatting change * Update autoload/llama.vim formatting comma Co-authored-by: Georgi Gerganov <[email protected]> * removed n_ctx --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 3ffd252 commit 9f5cadd

File tree

1 file changed

+30
-20
lines changed

1 file changed

+30
-20
lines changed

autoload/llama.vim

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -307,9 +307,9 @@ function! s:ring_update()
307307
\ 'samplers': ["temperature"],
308308
\ 'cache_prompt': v:true,
309309
\ 't_max_prompt_ms': 1,
310-
\ 't_max_predict_ms': 1
310+
\ 't_max_predict_ms': 1,
311+
\ 'response_fields': [""]
311312
\ })
312-
313313
let l:curl_command = [
314314
\ "curl",
315315
\ "--silent",
@@ -420,7 +420,20 @@ function! llama#fim(is_auto, cache) abort
420420
\ 'samplers': ["top_k", "top_p", "infill"],
421421
\ 'cache_prompt': v:true,
422422
\ 't_max_prompt_ms': g:llama_config.t_max_prompt_ms,
423-
\ 't_max_predict_ms': g:llama_config.t_max_predict_ms
423+
\ 't_max_predict_ms': g:llama_config.t_max_predict_ms,
424+
\ 'response_fields': [
425+
\ "content",
426+
\ "timings/prompt_n",
427+
\ "timings/prompt_ms",
428+
\ "timings/prompt_per_token_ms",
429+
\ "timings/prompt_per_second",
430+
\ "timings/predicted_n",
431+
\ "timings/predicted_ms",
432+
\ "timings/predicted_per_token_ms",
433+
\ "timings/predicted_per_second",
434+
\ "truncated",
435+
\ "tokens_cached",
436+
\ ],
424437
\ })
425438

426439
let l:curl_command = [
@@ -662,24 +675,21 @@ function! s:fim_on_stdout(hash, cache, pos_x, pos_y, is_auto, job_id, data, even
662675
call remove(s:content, -1)
663676
endwhile
664677

665-
let l:generation_settings = get(l:response, 'generation_settings', {})
666-
let l:n_ctx = get(l:generation_settings, 'n_ctx', 0)
667-
668-
let l:n_cached = get(l:response, 'tokens_cached', 0)
669-
let l:truncated = get(l:response, 'truncated', v:false)
678+
let l:n_cached = get(l:response, 'timings/tokens_cached', 0)
679+
let l:truncated = get(l:response, 'timings/truncated', v:false)
670680

671681
" if response.timings is available
672-
if len(get(l:response, 'timings', {})) > 0
682+
if has_key(l:response, 'timings/prompt_n') && has_key(l:response, 'timings/prompt_ms') && has_key(l:response, 'timings/prompt_per_second')
683+
\ && has_key(l:response, 'timings/predicted_n') && has_key(l:response, 'timings/predicted_ms') && has_key(l:response, 'timings/predicted_per_second')
673684
let l:has_info = v:true
674-
let l:timings = get(l:response, 'timings', {})
675685

676-
let l:n_prompt = get(l:timings, 'prompt_n', 0)
677-
let l:t_prompt_ms = get(l:timings, 'prompt_ms', 1)
678-
let l:s_prompt = get(l:timings, 'prompt_per_second', 0)
686+
let l:n_prompt = get(l:response, 'timings/prompt_n', 0)
687+
let l:t_prompt_ms = get(l:response, 'timings/prompt_ms', 1)
688+
let l:s_prompt = get(l:response, 'timings/prompt_per_second', 0)
679689

680-
let l:n_predict = get(l:timings, 'predicted_n', 0)
681-
let l:t_predict_ms = get(l:timings, 'predicted_ms', 1)
682-
let l:s_predict = get(l:timings, 'predicted_per_second', 0)
690+
let l:n_predict = get(l:response, 'timings/predicted_n', 0)
691+
let l:t_predict_ms = get(l:response, 'timings/predicted_ms', 1)
692+
let l:s_predict = get(l:response, 'timings/predicted_per_second', 0)
683693
endif
684694

685695
" if response was pulled from cache
@@ -772,9 +782,9 @@ function! s:fim_on_stdout(hash, cache, pos_x, pos_y, is_auto, job_id, data, even
772782
let l:prefix = ' '
773783

774784
if l:truncated
775-
let l:info = printf("%s | WARNING: the context is full: %d / %d, increase the server context size or reduce g:llama_config.ring_n_chunks",
785+
let l:info = printf("%s | WARNING: the context is full: %d, increase the server context size or reduce g:llama_config.ring_n_chunks",
776786
\ g:llama_config.show_info == 2 ? l:prefix : 'llama.vim',
777-
\ l:n_cached, l:n_ctx
787+
\ l:n_cached
778788
\ )
779789
elseif l:is_cached
780790
let l:info = printf("%s | C: %d / %d, | t: %.2f ms",
@@ -783,9 +793,9 @@ function! s:fim_on_stdout(hash, cache, pos_x, pos_y, is_auto, job_id, data, even
783793
\ 1000.0 * reltimefloat(reltime(s:t_fim_start))
784794
\ )
785795
else
786-
let l:info = printf("%s | c: %d / %d, r: %d / %d, e: %d, q: %d / 16 | p: %d (%.2f ms, %.2f t/s) | g: %d (%.2f ms, %.2f t/s) | t: %.2f ms",
796+
let l:info = printf("%s | c: %d, r: %d / %d, e: %d, q: %d / 16 | p: %d (%.2f ms, %.2f t/s) | g: %d (%.2f ms, %.2f t/s) | t: %.2f ms",
787797
\ g:llama_config.show_info == 2 ? l:prefix : 'llama.vim',
788-
\ l:n_cached, l:n_ctx, len(s:ring_chunks), g:llama_config.ring_n_chunks, s:ring_n_evict, len(s:ring_queued),
798+
\ l:n_cached, len(s:ring_chunks), g:llama_config.ring_n_chunks, s:ring_n_evict, len(s:ring_queued),
789799
\ l:n_prompt, l:t_prompt_ms, l:s_prompt,
790800
\ l:n_predict, l:t_predict_ms, l:s_predict,
791801
\ 1000.0 * reltimefloat(reltime(s:t_fim_start))

0 commit comments

Comments
 (0)