diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 2123fd9eba15..58737fa3ce0a 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -203,6 +203,15 @@ def __init__( ) self.weight_block_size = weight_block_size + def __repr__(self) -> str: + return ( + f"Fp8Config(" + f"is_checkpoint_fp8_serialized={self.is_checkpoint_fp8_serialized}, " + f"activation_scheme={self.activation_scheme}, " + f"ignored_layers={self.ignored_layers}, " + f"weight_block_size={self.weight_block_size})" + ) + @classmethod def get_name(cls) -> QuantizationMethods: return "fp8" diff --git a/vllm/model_executor/layers/quantization/utils/quant_utils.py b/vllm/model_executor/layers/quantization/utils/quant_utils.py index 2e9b279465f9..70c6ed6609ae 100644 --- a/vllm/model_executor/layers/quantization/utils/quant_utils.py +++ b/vllm/model_executor/layers/quantization/utils/quant_utils.py @@ -302,7 +302,10 @@ def is_layer_skipped( is_skipped = None for shard_prefix in shard_prefixes: - is_shard_skipped = shard_prefix in ignored_layers + is_shard_skipped = shard_prefix in ignored_layers or any( + shard_prefix.startswith(ignored_layer) + for ignored_layer in ignored_layers + ) if is_skipped is None: is_skipped = is_shard_skipped @@ -321,7 +324,9 @@ def is_layer_skipped( ] ) else: - is_skipped = prefix in ignored_layers + is_skipped = prefix in ignored_layers or any( + prefix.startswith(ignored_layer) for ignored_layer in ignored_layers + ) assert is_skipped is not None return is_skipped diff --git a/vllm/model_executor/models/ovis2_5.py b/vllm/model_executor/models/ovis2_5.py index 8f73f2ff8263..8896b4942e47 100644 --- a/vllm/model_executor/models/ovis2_5.py +++ b/vllm/model_executor/models/ovis2_5.py @@ -456,7 +456,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): config=config.vit_config, visual_vocab_size=config.visual_vocab_size, quant_config=quant_config, - prefix=f"{prefix}.visual_tokenizer", + prefix=maybe_prefix(prefix, "visual_tokenizer"), ) self.vte = VisualEmbedding(config.visual_vocab_size, config.hidden_size)