From b06c4f6adbb6cdffd3da1f42b89de3cc98731dc9 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Mon, 6 Oct 2025 20:49:36 +0800 Subject: [PATCH 1/2] fix ovis2.5 fp8 Signed-off-by: Isotr0py --- .../layers/quantization/utils/quant_utils.py | 9 +++++++-- vllm/model_executor/models/ovis2_5.py | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/quantization/utils/quant_utils.py b/vllm/model_executor/layers/quantization/utils/quant_utils.py index 2e9b279465f9..70c6ed6609ae 100644 --- a/vllm/model_executor/layers/quantization/utils/quant_utils.py +++ b/vllm/model_executor/layers/quantization/utils/quant_utils.py @@ -302,7 +302,10 @@ def is_layer_skipped( is_skipped = None for shard_prefix in shard_prefixes: - is_shard_skipped = shard_prefix in ignored_layers + is_shard_skipped = shard_prefix in ignored_layers or any( + shard_prefix.startswith(ignored_layer) + for ignored_layer in ignored_layers + ) if is_skipped is None: is_skipped = is_shard_skipped @@ -321,7 +324,9 @@ def is_layer_skipped( ] ) else: - is_skipped = prefix in ignored_layers + is_skipped = prefix in ignored_layers or any( + prefix.startswith(ignored_layer) for ignored_layer in ignored_layers + ) assert is_skipped is not None return is_skipped diff --git a/vllm/model_executor/models/ovis2_5.py b/vllm/model_executor/models/ovis2_5.py index 8f73f2ff8263..f5fcba4abc19 100644 --- a/vllm/model_executor/models/ovis2_5.py +++ b/vllm/model_executor/models/ovis2_5.py @@ -445,7 +445,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() config = vllm_config.model_config.hf_config quant_config = vllm_config.quant_config - + print(quant_config) self.config: PretrainedConfig = config self.llm = init_vllm_registered_model( vllm_config=vllm_config.with_hf_config(config.text_config), @@ -456,7 +456,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): config=config.vit_config, visual_vocab_size=config.visual_vocab_size, quant_config=quant_config, - prefix=f"{prefix}.visual_tokenizer", + prefix=maybe_prefix(prefix, "visual_tokenizer"), ) self.vte = VisualEmbedding(config.visual_vocab_size, config.hidden_size) From 884ba2e99bc3e26278956e1a8e4f88036b8780e1 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Mon, 6 Oct 2025 20:54:09 +0800 Subject: [PATCH 2/2] clean Signed-off-by: Isotr0py --- vllm/model_executor/layers/quantization/fp8.py | 9 +++++++++ vllm/model_executor/models/ovis2_5.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 2123fd9eba15..58737fa3ce0a 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -203,6 +203,15 @@ def __init__( ) self.weight_block_size = weight_block_size + def __repr__(self) -> str: + return ( + f"Fp8Config(" + f"is_checkpoint_fp8_serialized={self.is_checkpoint_fp8_serialized}, " + f"activation_scheme={self.activation_scheme}, " + f"ignored_layers={self.ignored_layers}, " + f"weight_block_size={self.weight_block_size})" + ) + @classmethod def get_name(cls) -> QuantizationMethods: return "fp8" diff --git a/vllm/model_executor/models/ovis2_5.py b/vllm/model_executor/models/ovis2_5.py index f5fcba4abc19..8896b4942e47 100644 --- a/vllm/model_executor/models/ovis2_5.py +++ b/vllm/model_executor/models/ovis2_5.py @@ -445,7 +445,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() config = vllm_config.model_config.hf_config quant_config = vllm_config.quant_config - print(quant_config) + self.config: PretrainedConfig = config self.llm = init_vllm_registered_model( vllm_config=vllm_config.with_hf_config(config.text_config),