|
8 | 8 | from .dequant import is_quantized, dequantize_tensor |
9 | 9 |
|
10 | 10 | IMG_ARCH_LIST = {"flux", "sd1", "sdxl", "sd3", "aura", "hidream", "cosmos", "ltxv", "hyvid", "wan", "lumina2", "qwen_image"} |
11 | | -TXT_ARCH_LIST = {"t5", "t5encoder", "llama"} |
| 11 | +TXT_ARCH_LIST = {"t5", "t5encoder", "llama", "qwen2vl"} |
12 | 12 |
|
13 | 13 | def get_orig_shape(reader, tensor_name): |
14 | 14 | field_key = f"comfy.gguf.orig_shape.{tensor_name}" |
@@ -244,15 +244,16 @@ def gguf_clip_loader(path): |
244 | 244 | logging.warning(f"Dequantizing {temb_key} to prevent runtime OOM.") |
245 | 245 | sd[temb_key] = dequantize_tensor(sd[temb_key], dtype=torch.float16) |
246 | 246 | sd = sd_map_replace(sd, T5_SD_MAP) |
247 | | - elif arch in {"llama"}: |
| 247 | + elif arch in {"llama", "qwen2vl"}: |
248 | 248 | # TODO: pass model_options["vocab_size"] to loader somehow |
249 | 249 | temb_key = "token_embd.weight" |
250 | 250 | if temb_key in sd and sd[temb_key].shape[0] >= (64 * 1024): |
251 | 251 | # See note above for T5. |
252 | 252 | logging.warning(f"Dequantizing {temb_key} to prevent runtime OOM.") |
253 | 253 | sd[temb_key] = dequantize_tensor(sd[temb_key], dtype=torch.float16) |
254 | 254 | sd = sd_map_replace(sd, LLAMA_SD_MAP) |
255 | | - sd = llama_permute(sd, 32, 8) # L3 |
| 255 | + if arch == "llama": |
| 256 | + sd = llama_permute(sd, 32, 8) # L3 |
256 | 257 | else: |
257 | 258 | pass |
258 | 259 | return sd |
0 commit comments