|
| 1 | +use std::path::Path; |
| 2 | + |
| 3 | +use mlx_lm::{cache::ConcatKeyValueCache, models::qwen3::load_qwen3_model}; |
| 4 | +use mlx_lm_utils::tokenizer::{ |
| 5 | + load_model_chat_template_from_file, ApplyChatTemplateArgs, Conversation, Role, Tokenizer, |
| 6 | +}; |
| 7 | +use mlx_rs::{ |
| 8 | + ops::indexing::{IndexOp, NewAxis}, |
| 9 | + transforms::eval, |
| 10 | + Array, |
| 11 | +}; |
| 12 | + |
| 13 | +const CACHED_TEST_MODEL_DIR: &str = "./cache/Qwen3-4B-bf16"; |
| 14 | + |
| 15 | +fn qwen3() -> anyhow::Result<()> { |
| 16 | + let model_dir = Path::new(CACHED_TEST_MODEL_DIR); |
| 17 | + |
| 18 | + let model_id = "mlx-community/Qwen3-4B-bf16".to_string(); |
| 19 | + let tokenizer_file = model_dir.join("tokenizer.json"); |
| 20 | + let tokenizer_config_file = model_dir.join("tokenizer_config.json"); |
| 21 | + let mut tokenizer = |
| 22 | + Tokenizer::from_file(tokenizer_file).map_err(|e| anyhow::anyhow!("{:?}", e))?; |
| 23 | + let model_chat_template = load_model_chat_template_from_file(tokenizer_config_file)? |
| 24 | + .expect("Model chat template not found"); |
| 25 | + |
| 26 | + let conversations = vec![Conversation { |
| 27 | + role: Role::User, |
| 28 | + content: "what's your name?", |
| 29 | + }]; |
| 30 | + let args = ApplyChatTemplateArgs { |
| 31 | + conversations: vec![conversations.into()], |
| 32 | + documents: None, |
| 33 | + model_id: &model_id, |
| 34 | + chat_template_id: None, |
| 35 | + add_generation_prompt: None, |
| 36 | + continue_final_message: None, |
| 37 | + }; |
| 38 | + let encodings = tokenizer.apply_chat_template_and_encode(model_chat_template, args)?; |
| 39 | + let prompt: Vec<u32> = encodings |
| 40 | + .iter() |
| 41 | + .flat_map(|encoding| encoding.get_ids()) |
| 42 | + .copied() |
| 43 | + .collect(); |
| 44 | + let prompt_tokens = Array::from(&prompt[..]).index(NewAxis); |
| 45 | + |
| 46 | + let mut cache = Vec::new(); |
| 47 | + let mut model = load_qwen3_model(model_dir)?; |
| 48 | + let generate = mlx_lm::models::qwen3::Generate::<ConcatKeyValueCache>::new( |
| 49 | + &mut model, |
| 50 | + &mut cache, |
| 51 | + 0.2, |
| 52 | + &prompt_tokens, |
| 53 | + ); |
| 54 | + |
| 55 | + let mut tokens = Vec::new(); |
| 56 | + for (token, ntoks) in generate.zip(0..256) { |
| 57 | + let token = token.unwrap(); |
| 58 | + tokens.push(token.clone()); |
| 59 | + |
| 60 | + if ntoks == 0 { |
| 61 | + eval(&tokens).unwrap(); |
| 62 | + } |
| 63 | + |
| 64 | + if tokens.len() % 20 == 0 { |
| 65 | + eval(&tokens).unwrap(); |
| 66 | + let slice: Vec<u32> = tokens.drain(..).map(|t| t.item::<u32>()).collect(); |
| 67 | + let s = tokenizer.decode(&slice, true).unwrap(); |
| 68 | + print!("{s}"); |
| 69 | + } |
| 70 | + } |
| 71 | + |
| 72 | + eval(&tokens).unwrap(); |
| 73 | + let slice: Vec<u32> = tokens.drain(..).map(|t| t.item::<u32>()).collect(); |
| 74 | + let s = tokenizer.decode(&slice, true).unwrap(); |
| 75 | + println!("{s}"); |
| 76 | + |
| 77 | + println!("------"); |
| 78 | + |
| 79 | + Ok(()) |
| 80 | +} |
| 81 | + |
| 82 | +fn main() -> anyhow::Result<()> { |
| 83 | + qwen3() |
| 84 | +} |
0 commit comments