-
Notifications
You must be signed in to change notification settings - Fork 76
[convert] Support for DeepSeek-V3.2 and Dequantizing #641
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
40a3b28
e334817
dfbcf30
4c39a6f
10039f3
3075189
5a1ca69
58d0230
f5de237
5a64346
712f9ce
c65eef1
66cdd0f
0ba80e3
96125d6
cb9c123
88b2691
6a2c622
d7590a6
278f177
b852c3c
fea715e
d3ddadf
60ac422
33deb5d
934abd2
8c673b8
40e2430
4d72ba7
cbf7470
fd53387
783a281
acabb1f
65a0e10
da6d332
0b85709
a1ab6c8
2107988
cc8cf45
98681fe
035a911
d9f99ce
58ecae1
b03f20c
55d0560
98764b3
878fa9b
6961d51
27ce7aa
63ab5da
4c86202
489521e
15c2c7f
4cd6962
991e330
71d4b2b
2ad62f6
bbc4427
69ebd26
f062fde
76b2163
8283978
29c9888
87398cc
86588d3
9693b45
8069130
8a21aa3
182b8dc
0ae156b
d184f48
886d08d
4c74306
ba33789
a3247fe
320ce06
9fbce9f
b6f516f
2de9cd7
acc01d0
e57a2c1
70343c5
44e89f2
47c9e36
129a9fe
23af737
4ed0bc1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # SPDX-FileCopyrightText: Copyright contributors to the vLLM project | ||
|
|
||
| from compressed_tensors.entrypoints.convert import ( | ||
| convert_checkpoint, | ||
| FP8BlockDequantizer, | ||
| ) | ||
|
|
||
| # deepseek-ai/DeepSeek-V3.2 checkpoint has layers that are quantized in the FP8 | ||
| # quant method's FP8_BLOCK scheme. This script will upconvert to bfloat16 so that | ||
| # the model can be compressed in another configuration. | ||
| MODEL_ID = "deepseek-ai/DeepSeek-V3.2" | ||
| SAVE_DIR = MODEL_ID.rstrip("/").split("/")[-1] + "-bf16" | ||
|
|
||
| # Convert DeepSeek-V3.2 back to dense bfloat16 format | ||
| convert_checkpoint( | ||
| model_stub=MODEL_ID, | ||
| save_directory=SAVE_DIR, | ||
| converter=FP8BlockDequantizer( | ||
| # `deepseek-ai/DeepSeek-V3.2` fp8-block-quantized layers, found by inspection | ||
| targets=[ | ||
| r"re:.*mlp.*\.(gate_up|gate|up|down)_proj$", | ||
| r"re:.*self_attn.*\.(kv_b|o|q_a|q_b)_proj$", | ||
| r"re:.*self_attn.kv_a_proj_with_mqa$", | ||
| r"re:.*self_attn.indexer.(wk|wq_b)$", | ||
| ], | ||
| ), | ||
| max_workers=4, | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # SPDX-FileCopyrightText: Copyright contributors to the vLLM project | ||
|
|
||
| from compressed_tensors.entrypoints.convert import ( | ||
| convert_checkpoint, | ||
| FP8BlockDequantizer, | ||
| ) | ||
|
|
||
| MODEL_ID = "qwen-community/Qwen3-4B-FP8" | ||
| SAVE_DIR = MODEL_ID.rstrip("/").split("/")[-1].rstrip("-FP8") | ||
|
|
||
| # Convert Qwen3-4B-FP8 back to dense bfloat16 format | ||
| convert_checkpoint( | ||
| model_stub=MODEL_ID, | ||
| save_directory=SAVE_DIR, | ||
| converter=FP8BlockDequantizer( | ||
| # qwen-community/Qwen3-4B-FP8's fp8-block-quantized layers, found by inspection | ||
| targets=[ | ||
| r"re:.*mlp.*\.(gate_up|gate|up|down)_proj$", | ||
| r"re:.*self_attn.*\.(q|k|v|o)_proj$", | ||
| ], | ||
| weight_block_size=[128, 128], | ||
| ), | ||
| max_workers=8, | ||
| ) |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -7,9 +7,13 @@ | |||||
| from compressed_tensors import __version__ as ct_version | ||||||
| from compressed_tensors.base import COMPRESSION_VERSION_NAME, QUANTIZATION_CONFIG_NAME | ||||||
| from compressed_tensors.entrypoints.convert import Converter | ||||||
| from compressed_tensors.utils.safetensors_load import find_config_path | ||||||
| from compressed_tensors.utils.safetensors_load import ( | ||||||
| InverseWeightMap, | ||||||
| find_config_path, | ||||||
| load_tensors_from_inverse_weight_map, | ||||||
| ) | ||||||
| from loguru import logger | ||||||
| from safetensors.torch import load_file, save_file | ||||||
| from safetensors.torch import save_file | ||||||
|
|
||||||
|
|
||||||
| __all__ = [ | ||||||
|
|
@@ -34,17 +38,20 @@ def write_checkpoint_quantization_config( | |||||
| :param converter: Converter instance whose create_config() produces the | ||||||
| updated quantization config | ||||||
| """ | ||||||
| quant_config = converter.create_config() | ||||||
|
|
||||||
| quant_config_data = quant_config.model_dump() | ||||||
| quant_config_data[COMPRESSION_VERSION_NAME] = ct_version | ||||||
| quant_config_data = None | ||||||
| if (quant_config := converter.create_config()) is not None: | ||||||
| quant_config_data = quant_config.model_dump() | ||||||
| quant_config_data[COMPRESSION_VERSION_NAME] = ct_version | ||||||
|
|
||||||
| config_file_path = find_config_path(save_directory) | ||||||
| if config_file_path is not None: | ||||||
| with open(config_file_path, "r") as file: | ||||||
| config_data = json.load(file) | ||||||
|
|
||||||
| config_data[QUANTIZATION_CONFIG_NAME] = quant_config_data | ||||||
| if quant_config_data is None: | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. qconfig field is not guaranteed to exist
Suggested change
|
||||||
| del config_data[QUANTIZATION_CONFIG_NAME] | ||||||
| else: | ||||||
| config_data[QUANTIZATION_CONFIG_NAME] = quant_config_data | ||||||
|
|
||||||
| with open(config_file_path, "w") as file: | ||||||
| json.dump(config_data, file, indent=2, sort_keys=True) | ||||||
|
|
@@ -57,35 +64,45 @@ def write_checkpoint_quantization_config( | |||||
|
|
||||||
|
|
||||||
| def validate_file( | ||||||
| file_path: str | os.PathLike, | ||||||
| inverse_weight_map: InverseWeightMap, | ||||||
| converter: Converter, | ||||||
| ): | ||||||
| """ | ||||||
| Validate that each quantizable tensor in a safetensors file can be quantized. | ||||||
|
|
||||||
| :param file_path: safetensors file to validate | ||||||
| :param inverse_weight_map: mapping of resolved source file path -> | ||||||
| list of tensor names to load from that file. Precomputed by | ||||||
| build_inverse_weight_map() in the job-building phase. | ||||||
| Example: {"/path/shard0.safetensors": ["q_proj.weight"], | ||||||
| "/path/shard1.safetensors": ["k_proj.weight", "v_proj.weight"]} | ||||||
| :param converter: converter we wish to apply to the checkpoint, | ||||||
| e.g. conversion of some layers from some format to compressed-tensors | ||||||
| """ | ||||||
| tensors = load_file(file_path) | ||||||
| tensors = load_tensors_from_inverse_weight_map(inverse_weight_map) | ||||||
|
|
||||||
| converter.validate(tensors) | ||||||
|
|
||||||
|
|
||||||
| def convert_file( | ||||||
| file_path: str | os.PathLike, | ||||||
| inverse_weight_map: InverseWeightMap, | ||||||
| save_path: str | os.PathLike, | ||||||
| converter: Converter, | ||||||
| ) -> tuple[int, dict[str, str]]: | ||||||
| """ | ||||||
| Convert tensors in a given safetensors file | ||||||
|
|
||||||
| :param file_path: safetensors file to process | ||||||
| :param inverse_weight_map: mapping of resolved source file path -> | ||||||
| list of tensor names to load from that file. Precomputed by | ||||||
| build_inverse_weight_map() in the job-building phase. | ||||||
| Example: {"/path/shard0.safetensors": ["q_proj.weight"], | ||||||
| "/path/shard1.safetensors": ["k_proj.weight", "v_proj.weight"]} | ||||||
| :param save_path: save path of file with quantized weights | ||||||
| :param converter: converter we wish to apply to the checkpoint, | ||||||
| e.g. conversion of some layers from some format to compressed-tensors | ||||||
| :returns: tuple of (total_size, weight_map), respectively the total size in bytes | ||||||
| of the saved file and dictionary of weight name -> save path | ||||||
| """ | ||||||
| tensors = load_file(file_path) | ||||||
| tensors = load_tensors_from_inverse_weight_map(inverse_weight_map) | ||||||
|
|
||||||
| converter.process(tensors) | ||||||
|
|
||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,3 +6,4 @@ | |
|
|
||
| from .base import * | ||
| from .modelopt_nvfp4 import * | ||
| from .fp8block_dequantizer import * | ||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -3,11 +3,15 @@ | |||||||||||
|
|
||||||||||||
| from __future__ import annotations | ||||||||||||
|
|
||||||||||||
| from collections import defaultdict | ||||||||||||
| from typing import TYPE_CHECKING, Protocol | ||||||||||||
|
|
||||||||||||
| import torch | ||||||||||||
| from compressed_tensors.utils.safetensors_load import InverseWeightMap | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| __all__ = ["Converter", "build_inverse_weight_maps"] | ||||||||||||
|
|
||||||||||||
| if TYPE_CHECKING: | ||||||||||||
| from compressed_tensors.quantization import QuantizationConfig | ||||||||||||
|
|
||||||||||||
|
|
@@ -42,9 +46,100 @@ def validate(self, tensors: dict[str, torch.Tensor]): | |||||||||||
| """ | ||||||||||||
| pass | ||||||||||||
|
|
||||||||||||
| def create_config(self) -> QuantizationConfig: | ||||||||||||
| def create_config(self) -> QuantizationConfig | None: | ||||||||||||
| """ | ||||||||||||
| Create compressed-tensors QuantizationConfig so that it can be set in the | ||||||||||||
| new model checkpoint's config.json. | ||||||||||||
| If the converter is moving checkpoint to full-precision, have this function | ||||||||||||
| return None, and quantization_config will be removed from config.json | ||||||||||||
|
Comment on lines
+53
to
+54
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
| """ | ||||||||||||
| pass | ||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make this an abstract method to force implementers to make a decision here
Suggested change
|
||||||||||||
|
|
||||||||||||
| def get_dependencies(self, weight_name: str) -> dict[str, bool]: | ||||||||||||
| """ | ||||||||||||
| Given a weight name, return a dictionary of all dependency weight names, so that | ||||||||||||
| weights can be processed correctly and in a parallelized fashion. | ||||||||||||
| If a dependency is optional, the value associated with the key should be False. | ||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please give an example of an "optional dependency" to make the concept clear |
||||||||||||
| If the value is True, it is assumed the weight is required and will error out | ||||||||||||
| during the job build phase if not found. | ||||||||||||
| If there are no dependencies, an empty dict should be returned. | ||||||||||||
|
|
||||||||||||
| :returns: dict[str, bool] {dependency weight name -> whether it is required} | ||||||||||||
| """ | ||||||||||||
| pass | ||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Either abstract method or return empty dict |
||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| def build_inverse_weight_maps( | ||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This probably goes in a utils file, not the base class file |
||||||||||||
| weight_map: dict[str, str], | ||||||||||||
| model_files: dict[str, str], | ||||||||||||
| converters: list[Converter], | ||||||||||||
| ) -> dict[str, InverseWeightMap]: | ||||||||||||
| """ | ||||||||||||
| For a given output shard, precompute exactly which tensors to load from | ||||||||||||
| which source files — including required partner tensors from other shards. | ||||||||||||
|
|
||||||||||||
| This is necessary because some converters require that a set of tensors are | ||||||||||||
| accessible in order for them to be processed correctly. | ||||||||||||
|
|
||||||||||||
| :param shard_name: the shard filename this job will process and save | ||||||||||||
| :param weight_map: tensor name -> shard filename (from safetensors.index.json) | ||||||||||||
| :param model_files: shard filename -> resolved absolute path | ||||||||||||
| :return: {resolved_file_path: [tensor_names_to_load]} | ||||||||||||
| """ | ||||||||||||
|
|
||||||||||||
| def get_dependencies_recursive( | ||||||||||||
| weight_name: str, converters: list[Converter], current_deps: dict[str, bool] | ||||||||||||
| ) -> dict[str, bool]: | ||||||||||||
| for converter in converters: | ||||||||||||
| for dep, is_required in converter.get_dependencies(weight_name).items(): | ||||||||||||
| if dep not in current_deps: | ||||||||||||
| current_deps[dep] = is_required | ||||||||||||
| get_dependencies_recursive(dep, converters, current_deps) | ||||||||||||
|
|
||||||||||||
| return current_deps | ||||||||||||
|
|
||||||||||||
| # map of weight name -> ( map of dependency name -> is_required ) | ||||||||||||
| weight_deps_dict: dict[str, set[str]] = defaultdict(set) | ||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
| for weight_name, weight_shard_name in weight_map.items(): | ||||||||||||
| weight_deps_dict[weight_name] = get_dependencies_recursive( | ||||||||||||
| weight_name, converters, {} | ||||||||||||
| ) | ||||||||||||
| assert ( | ||||||||||||
| weight_name not in weight_deps_dict[weight_name] | ||||||||||||
| ), f"{weight_name} found in dependencies {weight_deps_dict[weight_name]}" | ||||||||||||
|
|
||||||||||||
| # set of all dependencies (i.e. all weight names required by another) | ||||||||||||
| all_dependencies: set[str] = set() | ||||||||||||
| for values in weight_deps_dict.values(): | ||||||||||||
| for value in values: | ||||||||||||
| all_dependencies.add(value) | ||||||||||||
|
Comment on lines
+112
to
+115
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
|
|
||||||||||||
| inverse_weight_maps: dict[str, InverseWeightMap] = defaultdict( | ||||||||||||
| lambda: defaultdict(list) | ||||||||||||
| ) | ||||||||||||
| for weight_name, weight_shard_name in weight_map.items(): | ||||||||||||
| if weight_name in all_dependencies: | ||||||||||||
| # weight is a partner to some other primary tensor, skip it | ||||||||||||
| continue | ||||||||||||
|
|
||||||||||||
| # weight is purely a primary weight, is not a dependency of anything | ||||||||||||
| # add it and all its dependencies | ||||||||||||
| inverse_weight_map: InverseWeightMap = inverse_weight_maps[weight_shard_name] | ||||||||||||
| dependency_weights = weight_deps_dict[weight_name] | ||||||||||||
| for weight_to_add_name, is_required in [ | ||||||||||||
| (weight_name, True), | ||||||||||||
| *dependency_weights.items(), | ||||||||||||
| ]: | ||||||||||||
| if weight_to_add_name not in weight_map: | ||||||||||||
| if is_required: | ||||||||||||
| raise ValueError( | ||||||||||||
| f"Required weight {weight_to_add_name} not found in weight map" | ||||||||||||
| ) | ||||||||||||
| else: | ||||||||||||
| continue | ||||||||||||
| weight_to_add_shard_name = weight_map[weight_to_add_name] | ||||||||||||
| resolved_path = model_files.get(weight_to_add_shard_name) | ||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
| inverse_weight_map[resolved_path].append(weight_to_add_name) | ||||||||||||
|
|
||||||||||||
| # return dicts, not defaultdicts, to avoid silent errors | ||||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ty |
||||||||||||
| return {k: dict(v) for k, v in inverse_weight_maps.items()} | ||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will error if there is no index file. Use something like this instead