Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions vllm/model_executor/layers/quantization/fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,15 @@ def __init__(
)
self.weight_block_size = weight_block_size

def __repr__(self) -> str:
return (
f"Fp8Config("
f"is_checkpoint_fp8_serialized={self.is_checkpoint_fp8_serialized}, "
f"activation_scheme={self.activation_scheme}, "
f"ignored_layers={self.ignored_layers}, "
f"weight_block_size={self.weight_block_size})"
)

@classmethod
def get_name(cls) -> QuantizationMethods:
return "fp8"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,10 @@ def is_layer_skipped(

is_skipped = None
for shard_prefix in shard_prefixes:
is_shard_skipped = shard_prefix in ignored_layers
is_shard_skipped = shard_prefix in ignored_layers or any(
shard_prefix.startswith(ignored_layer)
for ignored_layer in ignored_layers
)
Comment on lines +305 to +308
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

There's a potential issue here if ignored_layers contains an empty string. shard_prefix.startswith('') is always True, which would cause is_shard_skipped to be True for all layers, effectively disabling quantization for all fused layers. This could lead to silent failures and incorrect model behavior. It's safer to filter out empty strings from ignored_layers before checking startswith.

Suggested change
is_shard_skipped = shard_prefix in ignored_layers or any(
shard_prefix.startswith(ignored_layer)
for ignored_layer in ignored_layers
)
is_shard_skipped = shard_prefix in ignored_layers or any(
shard_prefix.startswith(ignored_layer)
for ignored_layer in ignored_layers if ignored_layer
)


if is_skipped is None:
is_skipped = is_shard_skipped
Expand All @@ -321,7 +324,9 @@ def is_layer_skipped(
]
)
else:
is_skipped = prefix in ignored_layers
is_skipped = prefix in ignored_layers or any(
prefix.startswith(ignored_layer) for ignored_layer in ignored_layers
)
Comment on lines +327 to +329
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Similar to the change for fused layers, this logic is vulnerable to an empty string in ignored_layers. If ignored_layers contains '', prefix.startswith('') will always be True, causing all layers to be skipped from quantization. This can be prevented by ensuring ignored_layer is not an empty string before calling startswith.

Suggested change
is_skipped = prefix in ignored_layers or any(
prefix.startswith(ignored_layer) for ignored_layer in ignored_layers
)
is_skipped = prefix in ignored_layers or any(
prefix.startswith(ignored_layer) for ignored_layer in ignored_layers if ignored_layer
)

Comment on lines 326 to +329

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Handle partial module prefixes anywhere in layer path

The new startswith checks still only match when the ignored layer string is at the very beginning of the module name. In the case described in the commit message (e.g. modules_to_not_convert contains "lm_head" but the actual prefix seen by quantization is "llm.lm_head"), prefix.startswith("lm_head") returns False, so the layer is quantized even though it was supposed to be skipped. As a result ms‑swift fp8 checkpoints with truncated prefixes will still attempt to quantize modules like llm.lm_head and fail to load correctly. The comparison needs to match ignored entries anywhere in the qualified name (e.g. with in/endswith) rather than only at the start.

Useful? React with 👍 / 👎.


assert is_skipped is not None
return is_skipped
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/models/ovis2_5.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
config=config.vit_config,
visual_vocab_size=config.visual_vocab_size,
quant_config=quant_config,
prefix=f"{prefix}.visual_tokenizer",
prefix=maybe_prefix(prefix, "visual_tokenizer"),
)

self.vte = VisualEmbedding(config.visual_vocab_size, config.hidden_size)
Expand Down