|
10 | 10 | from .dequant import is_quantized, dequantize_tensor |
11 | 11 |
|
12 | 12 | IMG_ARCH_LIST = {"flux", "sd1", "sdxl", "sd3", "aura", "hidream", "cosmos", "ltxv", "hyvid", "wan", "lumina2", "qwen_image"} |
13 | | -TXT_ARCH_LIST = {"t5", "t5encoder", "llama", "qwen2vl"} |
| 13 | +TXT_ARCH_LIST = {"t5", "t5encoder", "llama", "qwen2vl", "qwen3"} |
14 | 14 | VIS_TYPE_LIST = {"clip-vision", "mmproj"} |
15 | 15 |
|
16 | 16 | def get_orig_shape(reader, tensor_name): |
@@ -157,6 +157,9 @@ def gguf_sd_loader(path, handle_prefix="model.diffusion_model.", return_arch=Fal |
157 | 157 | LLAMA_SD_MAP = { |
158 | 158 | "blk.": "model.layers.", |
159 | 159 | "attn_norm": "input_layernorm", |
| 160 | + "attn_q_norm.": "self_attn.q_norm.", |
| 161 | + "attn_k_norm.": "self_attn.k_norm.", |
| 162 | + "attn_v_norm.": "self_attn.v_norm.", |
160 | 163 | "attn_q": "self_attn.q_proj", |
161 | 164 | "attn_k": "self_attn.k_proj", |
162 | 165 | "attn_v": "self_attn.v_proj", |
@@ -335,7 +338,7 @@ def gguf_clip_loader(path): |
335 | 338 | logging.warning(f"Dequantizing {temb_key} to prevent runtime OOM.") |
336 | 339 | sd[temb_key] = dequantize_tensor(sd[temb_key], dtype=torch.float16) |
337 | 340 | sd = sd_map_replace(sd, T5_SD_MAP) |
338 | | - elif arch in {"llama", "qwen2vl"}: |
| 341 | + elif arch in {"llama", "qwen2vl", "qwen3"}: |
339 | 342 | # TODO: pass model_options["vocab_size"] to loader somehow |
340 | 343 | temb_key = "token_embd.weight" |
341 | 344 | if temb_key in sd and sd[temb_key].shape[0] >= (64 * 1024): |
|
0 commit comments