Skip to content

Commit 7e21c0f

Browse files
committed
Minor Fix of Output Putting Rotary back to hf rotary
Signed-off-by: Dipankar Sarkar <dipankar@qti.qualcomm.com>
1 parent 9615ced commit 7e21c0f

File tree

2 files changed

+109
-100
lines changed

2 files changed

+109
-100
lines changed

QEfficient/transformers/models/pytorch_transforms.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,6 @@
175175
Qwen3VLTextDecoderLayer,
176176
Qwen3VLTextModel,
177177
Qwen3VLTextRMSNorm,
178-
Qwen3VLTextRotaryEmbedding,
179178
Qwen3VLVisionAttention,
180179
Qwen3VLVisionModel,
181180
)
@@ -393,7 +392,7 @@
393392
QEffQwen3VLTextAttention,
394393
QEffQwen3VLTextDecoderLayer,
395394
QEffQwen3VLTextModel,
396-
QEffQwen3VLTextRotaryEmbedding,
395+
# QEffQwen3VLTextRotaryEmbedding,
397396
QEffQwen3VLVisionAttention,
398397
QEffQwen3VLVisionModel,
399398
)
@@ -588,17 +587,15 @@ class KVCacheTransform(ModuleMappingTransform):
588587
Qwen2_5_VLDecoderLayer: QEffQwen2_5_VLDecoderLayer,
589588
Qwen2_5_VisionTransformerPretrainedModel: QEffQwen2_5_VisionTransformerPretrainedModel,
590589
Qwen2_5_VLVisionAttention: QEffQwen2_5_VLVisionAttention,
591-
592-
593-
#Qwen3vl
590+
# Qwen3vl
594591
Qwen3VLForConditionalGeneration: QEffQwen3VLForConditionalGeneration,
595592
Qwen3VLModel: QEffQwen3VLModel,
596593
Qwen3VLTextAttention: QEffQwen3VLTextAttention,
597594
Qwen3VLTextDecoderLayer: QEffQwen3VLTextDecoderLayer,
598595
Qwen3VLVisionAttention: QEffQwen3VLVisionAttention,
599596
Qwen3VLVisionModel: QEffQwen3VLVisionModel,
600597
Qwen3VLTextModel: QEffQwen3VLTextModel,
601-
Qwen3VLTextRotaryEmbedding: QEffQwen3VLTextRotaryEmbedding, # reusing decoder layer for rotary embedding as they are tightly coupled in forward pass
598+
# Qwen3VLTextRotaryEmbedding: QEffQwen3VLTextRotaryEmbedding, # reusing decoder layer for rotary embedding as they are tightly coupled in forward pass
602599
# Starcoder2
603600
Starcoder2Attention: QEffStarcoder2Attention,
604601
Starcoder2DecoderLayer: QEFFStarcoder2DecoderLayer,
@@ -856,4 +853,4 @@ def apply(cls, model: nn.Module, num_kv_blocks) -> Tuple[nn.Module, bool]:
856853
transformed = True # Set to True if at least one transformation occurs
857854
elif module.__class__.__name__.endswith("Attention") and type(module) not in cls._module_mapping:
858855
warnings.warn(f"KV blocking is not yet supported for {type(module)}.")
859-
return model, transformed
856+
return model, transformed

0 commit comments

Comments
 (0)