@@ -24,6 +24,7 @@ func llama_batch_add(_ batch: inout llama_batch, _ id: llama_token, _ pos: llama
2424actor LlamaContext {
2525 private var model : OpaquePointer
2626 private var context : OpaquePointer
27+ private var vocab : OpaquePointer
2728 private var sampling : UnsafeMutablePointer < llama_sampler >
2829 private var batch : llama_batch
2930 private var tokens_list : [ llama_token ]
@@ -47,6 +48,7 @@ actor LlamaContext {
4748 self . sampling = llama_sampler_chain_init ( sparams)
4849 llama_sampler_chain_add ( self . sampling, llama_sampler_init_temp ( 0.4 ) )
4950 llama_sampler_chain_add ( self . sampling, llama_sampler_init_dist ( 1234 ) )
51+ vocab = llama_model_get_vocab ( model)
5052 }
5153
5254 deinit {
@@ -79,7 +81,7 @@ actor LlamaContext {
7981 ctx_params. n_threads = Int32 ( n_threads)
8082 ctx_params. n_threads_batch = Int32 ( n_threads)
8183
82- let context = llama_new_context_with_model ( model, ctx_params)
84+ let context = llama_init_from_model ( model, ctx_params)
8385 guard let context else {
8486 print ( " Could not load context! " )
8587 throw LlamaError . couldNotInitializeContext
@@ -151,7 +153,7 @@ actor LlamaContext {
151153
152154 new_token_id = llama_sampler_sample ( sampling, context, batch. n_tokens - 1 )
153155
154- if llama_vocab_is_eog ( model , new_token_id) || n_cur == n_len {
156+ if llama_vocab_is_eog ( vocab , new_token_id) || n_cur == n_len {
155157 print ( " \n " )
156158 is_done = true
157159 let new_token_str = String ( cString: temporary_invalid_cchars + [ 0 ] )
@@ -297,7 +299,7 @@ actor LlamaContext {
297299 let utf8Count = text. utf8. count
298300 let n_tokens = utf8Count + ( add_bos ? 1 : 0 ) + 1
299301 let tokens = UnsafeMutablePointer< llama_token> . allocate( capacity: n_tokens)
300- let tokenCount = llama_tokenize ( model , text, Int32 ( utf8Count) , tokens, Int32 ( n_tokens) , add_bos, false )
302+ let tokenCount = llama_tokenize ( vocab , text, Int32 ( utf8Count) , tokens, Int32 ( n_tokens) , add_bos, false )
301303
302304 var swiftTokens : [ llama_token ] = [ ]
303305 for i in 0 ..< tokenCount {
@@ -316,15 +318,15 @@ actor LlamaContext {
316318 defer {
317319 result. deallocate ( )
318320 }
319- let nTokens = llama_token_to_piece ( model , token, result, 8 , 0 , false )
321+ let nTokens = llama_token_to_piece ( vocab , token, result, 8 , 0 , false )
320322
321323 if nTokens < 0 {
322324 let newResult = UnsafeMutablePointer< Int8> . allocate( capacity: Int ( - nTokens) )
323325 newResult. initialize ( repeating: Int8 ( 0 ) , count: Int ( - nTokens) )
324326 defer {
325327 newResult. deallocate ( )
326328 }
327- let nNewTokens = llama_token_to_piece ( model , token, newResult, - nTokens, 0 , false )
329+ let nNewTokens = llama_token_to_piece ( vocab , token, newResult, - nTokens, 0 , false )
328330 let bufferPointer = UnsafeBufferPointer ( start: newResult, count: Int ( nNewTokens) )
329331 return Array ( bufferPointer)
330332 } else {
0 commit comments