175175 Qwen3VLTextDecoderLayer ,
176176 Qwen3VLTextModel ,
177177 Qwen3VLTextRMSNorm ,
178- Qwen3VLTextRotaryEmbedding ,
179178 Qwen3VLVisionAttention ,
180179 Qwen3VLVisionModel ,
181180)
393392 QEffQwen3VLTextAttention ,
394393 QEffQwen3VLTextDecoderLayer ,
395394 QEffQwen3VLTextModel ,
396- QEffQwen3VLTextRotaryEmbedding ,
395+ # QEffQwen3VLTextRotaryEmbedding,
397396 QEffQwen3VLVisionAttention ,
398397 QEffQwen3VLVisionModel ,
399398)
@@ -588,17 +587,15 @@ class KVCacheTransform(ModuleMappingTransform):
588587 Qwen2_5_VLDecoderLayer : QEffQwen2_5_VLDecoderLayer ,
589588 Qwen2_5_VisionTransformerPretrainedModel : QEffQwen2_5_VisionTransformerPretrainedModel ,
590589 Qwen2_5_VLVisionAttention : QEffQwen2_5_VLVisionAttention ,
591-
592-
593- #Qwen3vl
590+ # Qwen3vl
594591 Qwen3VLForConditionalGeneration : QEffQwen3VLForConditionalGeneration ,
595592 Qwen3VLModel : QEffQwen3VLModel ,
596593 Qwen3VLTextAttention : QEffQwen3VLTextAttention ,
597594 Qwen3VLTextDecoderLayer : QEffQwen3VLTextDecoderLayer ,
598595 Qwen3VLVisionAttention : QEffQwen3VLVisionAttention ,
599596 Qwen3VLVisionModel : QEffQwen3VLVisionModel ,
600597 Qwen3VLTextModel : QEffQwen3VLTextModel ,
601- Qwen3VLTextRotaryEmbedding : QEffQwen3VLTextRotaryEmbedding , # reusing decoder layer for rotary embedding as they are tightly coupled in forward pass
598+ # Qwen3VLTextRotaryEmbedding: QEffQwen3VLTextRotaryEmbedding, # reusing decoder layer for rotary embedding as they are tightly coupled in forward pass
602599 # Starcoder2
603600 Starcoder2Attention : QEffStarcoder2Attention ,
604601 Starcoder2DecoderLayer : QEFFStarcoder2DecoderLayer ,
@@ -856,4 +853,4 @@ def apply(cls, model: nn.Module, num_kv_blocks) -> Tuple[nn.Module, bool]:
856853 transformed = True # Set to True if at least one transformation occurs
857854 elif module .__class__ .__name__ .endswith ("Attention" ) and type (module ) not in cls ._module_mapping :
858855 warnings .warn (f"KV blocking is not yet supported for { type (module )} ." )
859- return model , transformed
856+ return model , transformed
0 commit comments