Merge branch 'main' into stable

city96 · city96 · commit bdb8f62fdefc · 2025-08-05T15:36:32.000+02:00
diff --git a/loader.py b/loader.py
@@ -8,7 +8,7 @@
 from .dequant import is_quantized, dequantize_tensor
 
 IMG_ARCH_LIST = {"flux", "sd1", "sdxl", "sd3", "aura", "hidream", "cosmos", "ltxv", "hyvid", "wan", "lumina2", "qwen_image"}
-TXT_ARCH_LIST = {"t5", "t5encoder", "llama"}
+TXT_ARCH_LIST = {"t5", "t5encoder", "llama", "qwen2vl"}
 
 def get_orig_shape(reader, tensor_name):
     field_key = f"comfy.gguf.orig_shape.{tensor_name}"
@@ -244,15 +244,16 @@ def gguf_clip_loader(path):
             logging.warning(f"Dequantizing {temb_key} to prevent runtime OOM.")
             sd[temb_key] = dequantize_tensor(sd[temb_key], dtype=torch.float16)
         sd = sd_map_replace(sd, T5_SD_MAP)
-    elif arch in {"llama"}:
+    elif arch in {"llama", "qwen2vl"}:
         # TODO: pass model_options["vocab_size"] to loader somehow
         temb_key = "token_embd.weight"
         if temb_key in sd and sd[temb_key].shape[0] >= (64 * 1024):
             # See note above for T5.
             logging.warning(f"Dequantizing {temb_key} to prevent runtime OOM.")
             sd[temb_key] = dequantize_tensor(sd[temb_key], dtype=torch.float16)
         sd = sd_map_replace(sd, LLAMA_SD_MAP)
-        sd = llama_permute(sd, 32, 8) # L3
+        if arch == "llama":
+            sd = llama_permute(sd, 32, 8) # L3
     else:
         pass
     return sd