Fix MRoPE model inference when no MM embeddings present

turboderp · turboderp · commit 9cacd66229bc · 2024-11-20T05:49:03.000+01:00
diff --git a/exllamav2/model.py b/exllamav2/model.py
@@ -954,7 +954,7 @@ def forward_chunk(
             seq_len <= self.config.max_output_len, \
             "seq_len exceeds max_output_len"
 
-        if self.config.arch.lm.mrope and "indexed_embeddings" in kwargs:
+        if self.config.arch.lm.mrope and kwargs.get("indexed_embeddings"):
             assert attn_params is not None and (
                 attn_params.rope_offsets is not None or
                 attn_params.get_alt_rope_embed("cpu") is not None