-
Notifications
You must be signed in to change notification settings - Fork 453
Expand file tree
/
Copy pathtest_convert_checkpoint.py
More file actions
106 lines (93 loc) · 3.3 KB
/
test_convert_checkpoint.py
File metadata and controls
106 lines (93 loc) · 3.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import json
import pytest
from compressed_tensors.entrypoints.convert import (
ModelOptNvfp4Converter,
convert_checkpoint,
)
from compressed_tensors.quantization import (
QuantizationArgs,
QuantizationConfig,
QuantizationType,
)
from compressed_tensors.quantization.quant_scheme import NVFP4
from tests.testing_utils import requires_cadence
# NOTE: This file contains tests for compressed_tensors.entrypoints.convert
# that are either long-running or involve larger models. They have been placed
# here to leverage llm-compressor's nightly testing CI/CD.
@requires_cadence("nightly")
def test_convert_checkpoint(tmp_path):
"""
Test that compressed-tensors convert_checkpoint entrypoint
can be run on a pre-existing modelopt checkpoint
"""
MODEL_ID = "nvidia/Qwen3-8B-NVFP4"
convert_outdir = tmp_path / "convert_out"
right_targets = [
r"re:.*mlp.*\.(gate_up|gate|up|down)_proj$",
r"re:.*self_attn.*\.(q|k|v|o)_proj$",
]
wrong_targets = [
r"re:.*mlp.*\.(gate_up|gate|up|down)_proj$",
r"re:.*self_attn.*\.(q|k|o)_proj$",
]
right_kv_cache_scheme = QuantizationArgs(
num_bits=8, dynamic=False, type=QuantizationType.FLOAT
)
wrong_kv_cache_scheme = None
with pytest.raises(ValueError):
convert_checkpoint(
model_stub=MODEL_ID,
save_directory=convert_outdir,
converter=ModelOptNvfp4Converter(
targets=right_targets,
kv_cache_scheme=wrong_kv_cache_scheme,
),
)
with pytest.raises(ValueError):
convert_checkpoint(
model_stub=MODEL_ID,
save_directory=convert_outdir,
converter=ModelOptNvfp4Converter(
targets=wrong_targets,
kv_cache_scheme=right_kv_cache_scheme,
),
)
convert_checkpoint(
model_stub=MODEL_ID,
save_directory=convert_outdir,
converter=ModelOptNvfp4Converter(
targets=right_targets,
kv_cache_scheme=right_kv_cache_scheme,
),
)
with open(convert_outdir / "config.json", "r") as f:
config = json.load(f)
qconfig = QuantizationConfig.model_validate(config["quantization_config"])
assert qconfig.format == "nvfp4-pack-quantized"
assert qconfig.quant_method == "compressed-tensors"
assert len(qconfig.config_groups) == 1
# assert weights and input_activations are a superset of what's in the NVFP4 preset
assert (
qconfig.config_groups["config_group_0"].weights.model_dump().items()
>= NVFP4["weights"].model_dump().items()
)
assert (
qconfig.config_groups["config_group_0"].input_activations.model_dump().items()
>= NVFP4["input_activations"].model_dump().items()
)
with open(convert_outdir / "model.safetensors.index.json", "r") as f:
allowed_suffixes = [
"weight",
"weight_scale",
"weight_packed",
"weight_global_scale",
"input_global_scale",
"k_scale",
"v_scale",
]
data = json.load(f)
keys = data["weight_map"].keys()
for key in keys:
assert any(key.endswith(suffix) for suffix in allowed_suffixes), (
f"Unexpected key found: {key}"
)