11//! This is an translation of simple.cpp in llama.cpp using llama-cpp-2.
2- #![ allow( clippy:: cast_possible_wrap, clippy:: cast_possible_truncation) ]
2+ #![ allow( clippy:: cast_possible_wrap, clippy:: cast_possible_truncation, clippy :: cast_precision_loss , clippy :: cast_sign_loss ) ]
33
4- use std:: io:: Write ;
5- use std:: num:: NonZeroU32 ;
6- use std:: path:: PathBuf ;
7- use std:: time:: Duration ;
4+ use anyhow:: { bail, Context , Result } ;
85use clap:: Parser ;
96use llama_cpp_2:: context:: params:: LlamaContextParams ;
10- use llama_cpp_2:: llama_backend:: LlamaBackend ;
11- use llama_cpp_2:: model:: LlamaModel ;
12- use llama_cpp_2:: model:: params:: LlamaModelParams ;
13- use anyhow:: { bail, Context , Result } ;
147use llama_cpp_2:: ggml_time_us;
8+ use llama_cpp_2:: llama_backend:: LlamaBackend ;
159use llama_cpp_2:: llama_batch:: LlamaBatch ;
16- use llama_cpp_2:: token :: data_array :: LlamaTokenDataArray ;
10+ use llama_cpp_2:: model :: params :: LlamaModelParams ;
1711use llama_cpp_2:: model:: AddBos ;
18-
12+ use llama_cpp_2:: model:: LlamaModel ;
13+ use llama_cpp_2:: token:: data_array:: LlamaTokenDataArray ;
14+ use std:: io:: Write ;
15+ use std:: num:: NonZeroU32 ;
16+ use std:: path:: PathBuf ;
17+ use std:: time:: Duration ;
1918
2019#[ derive( clap:: Parser ) ]
2120struct Args {
@@ -30,7 +29,6 @@ struct Args {
3029 disable_gpu : bool ,
3130}
3231
33-
3432fn main ( ) -> Result < ( ) > {
3533 let params = Args :: parse ( ) ;
3634
@@ -60,12 +58,14 @@ fn main() -> Result<()> {
6058 . with_n_ctx ( NonZeroU32 :: new ( 2048 ) )
6159 . with_seed ( 1234 ) ;
6260
63- let mut ctx = model. new_context ( & backend, ctx_params)
61+ let mut ctx = model
62+ . new_context ( & backend, ctx_params)
6463 . with_context ( || "unable to create the llama_context" ) ?;
6564
6665 // tokenize the prompt
6766
68- let tokens_list = model. str_to_token ( & params. prompt , AddBos :: Always )
67+ let tokens_list = model
68+ . str_to_token ( & params. prompt , AddBos :: Always )
6969 . with_context ( || format ! ( "failed to tokenize {}" , params. prompt) ) ?;
7070
7171 let n_cxt = ctx. n_ctx ( ) as i32 ;
@@ -75,8 +75,10 @@ fn main() -> Result<()> {
7575
7676 // make sure the KV cache is big enough to hold all the prompt and generated tokens
7777 if n_kv_req > n_cxt {
78- bail ! ( "n_kv_req > n_ctx, the required kv cache size is not big enough
79- either reduce n_len or increase n_ctx" )
78+ bail ! (
79+ "n_kv_req > n_ctx, the required kv cache size is not big enough
80+ either reduce n_len or increase n_ctx"
81+ )
8082 }
8183
8284 // print the prompt token-by-token
@@ -137,7 +139,6 @@ either reduce n_len or increase n_ctx")
137139 ctx. decode ( & mut batch) . with_context ( || "failed to eval" ) ?;
138140
139141 n_decode += 1 ;
140-
141142 }
142143
143144 eprintln ! ( "\n " ) ;
@@ -146,10 +147,14 @@ either reduce n_len or increase n_ctx")
146147
147148 let duration = Duration :: from_micros ( ( t_main_end - t_main_start) as u64 ) ;
148149
149- eprintln ! ( "decoded {} tokens in {:.2} s, speed {:.2} t/s\n " , n_decode, duration. as_secs_f32( ) , n_decode as f32 / duration. as_secs_f32( ) ) ;
150+ eprintln ! (
151+ "decoded {} tokens in {:.2} s, speed {:.2} t/s\n " ,
152+ n_decode,
153+ duration. as_secs_f32( ) ,
154+ n_decode as f32 / duration. as_secs_f32( )
155+ ) ;
150156
151157 println ! ( "{}" , ctx. timings( ) ) ;
152158
153159 Ok ( ( ) )
154-
155- }
160+ }
0 commit comments