Skip to content

Commit 9120a98

Browse files
[Compressors] Remove sparse compression (#2452)
## Purpose ## * Remove sparsity tests which fail now that sparse compression is no longer supported ## Corequisites ## * vllm-project/compressed-tensors#610 ## Testing ## * https://github.com/neuralmagic/llm-compressor-testing/actions/runs/23027021444/job/66877226415 --------- Signed-off-by: Kyle Sayers <kylesayrs@gmail.com> Co-authored-by: Brian Dellabetta <brian-dellabetta@users.noreply.github.com>
1 parent 353f556 commit 9120a98

File tree

10 files changed

+57
-610
lines changed

10 files changed

+57
-610
lines changed

src/llmcompressor/entrypoints/model_free/lifecycle.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
import torch
2-
from compressed_tensors.compressors import BaseCompressor
3-
from compressed_tensors.config.format import _get_quant_compression_format
42
from compressed_tensors.quantization import (
53
QuantizationScheme,
64
initialize_module_for_quantization,
@@ -20,7 +18,6 @@
2018
"validate_weight_for_quantization",
2119
"calibrate_global_scale",
2220
"calibrate_scale_zp",
23-
"compress_module",
2421
]
2522

2623

@@ -64,29 +61,3 @@ def calibrate_scale_zp(module: torch.nn.Linear):
6461
apply_calibration_status(module)
6562
update_weight_zp_scale(module)
6663
freeze_module_quantization(module)
67-
68-
69-
def compress_module(module: torch.nn.Linear):
70-
scheme: QuantizationScheme = getattr(module, "quantization_scheme")
71-
72-
format = _get_quant_compression_format(scheme.input_activations, scheme.weights)
73-
scheme.format = format.value
74-
75-
compressor = BaseCompressor.load_from_registry(format.value)
76-
data = compressor.compress_weight(
77-
module.weight,
78-
quantization_args=scheme.weights,
79-
scale=getattr(module, "weight_scale"),
80-
zero_point=getattr(module, "weight_zero_point", None),
81-
global_scale=getattr(module, "weight_global_scale", None),
82-
)
83-
84-
# `compress_weight` is a messy api
85-
delattr(module, "weight")
86-
for key, value in data.items():
87-
if hasattr(module, key):
88-
getattr(module, key).data = value
89-
else:
90-
module.register_parameter(
91-
key, torch.nn.Parameter(value, requires_grad=False)
92-
)

src/llmcompressor/entrypoints/model_free/process.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Iterable
44

55
import torch
6+
from compressed_tensors.compressors import compress_module
67
from compressed_tensors.entrypoints.convert import Converter
78
from compressed_tensors.quantization import QuantizationScheme
89
from compressed_tensors.utils import match_quantizable_tensors
@@ -12,7 +13,6 @@
1213
from llmcompressor.entrypoints.model_free.lifecycle import (
1314
calibrate_global_scale,
1415
calibrate_scale_zp,
15-
compress_module,
1616
initialize_quantized_linear,
1717
validate_weight_for_quantization,
1818
)

src/llmcompressor/transformers/compression/compressed_tensors_utils.py

Lines changed: 7 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from functools import wraps
44

55
import torch
6-
from accelerate.accelerator import get_state_dict_offloaded_model
76
from compressed_tensors import (
87
ModelCompressor,
98
SparsityCompressionConfig,
@@ -15,9 +14,6 @@
1514

1615
from llmcompressor.core import active_session
1716
from llmcompressor.pytorch.model_load.helpers import copy_python_files_from_model_cache
18-
from llmcompressor.transformers.compression.sparsity_metadata_config import (
19-
SparsityConfigMetadata,
20-
)
2117
from llmcompressor.transformers.utils import RECIPE_FILE_NAME
2218
from llmcompressor.transformers.utils.helpers import infer_recipe_from_model_path
2319

@@ -143,59 +139,14 @@ def get_model_compressor(
143139
:param disable_sparse_compression: bool to skip sparse compression
144140
"""
145141

146-
if sparsity_config is None:
147-
"""
148-
Case 1: No sparsity config is provided
149-
1. Will either skip sparsity compression
150-
2. Or we will infer sparsity from the model directly
151-
152-
Check recipe for applied sparsity:
153-
- Set skip_sparsity_compression_stats to False if don't find a
154-
sparsity structure from the recipe
155-
- If we identify sparsity based on the recipe or the user
156-
set skip_sparsity_compression_stats to False, generate config
157-
"""
158-
sparsity_structure = SparsityConfigMetadata.infer_sparsity_structure(
159-
model, check_only_modifiers=True
142+
if (
143+
sparsity_config is not None
144+
or not skip_sparsity_compression_stats
145+
or disable_sparse_compression
146+
):
147+
logger.warning(
148+
"Sparse compression is no longer supported by compressed-tensors"
160149
)
161-
if sparsity_structure is not None:
162-
skip_sparsity_compression_stats = False
163-
164-
if skip_sparsity_compression_stats:
165-
logger.info(
166-
"skip_sparsity_compression_stats set to True. Skipping sparsity "
167-
"compression statistic calculations. No sparsity compressor will "
168-
"be applied."
169-
)
170-
sparsity_config = None
171-
else:
172-
state_dict = get_state_dict_offloaded_model(model)
173-
174-
sparsity_config = SparsityConfigMetadata.from_pretrained(
175-
model,
176-
state_dict=state_dict,
177-
compress=save_compressed,
178-
quantization_format=quantization_format,
179-
disable_sparse_compression=disable_sparse_compression,
180-
sparsity_structure=sparsity_structure,
181-
)
182-
else:
183-
"""
184-
# Case 2: User provides a Sparsity Config
185-
- This is the case when there is existing sparsity in the
186-
model that we'd like to account for while compressing
187-
- Users should provide a SparsityConfig, conveying the model's
188-
sparsity structure when saving the model
189-
"""
190-
if sparsity_config.sparsity_structure is None:
191-
logger.info(
192-
"SparsityConfigMetadata provided without indicating ",
193-
"the sparsity structure. Sparisty will be inferred from the model. "
194-
"Consider providing the structure to skip this step ",
195-
)
196-
sparsity_config.sparsity_structure = (
197-
SparsityConfigMetadata.infer_sparsity_structure(model)
198-
)
199150

200151
if not save_compressed:
201152
if quantization_format not in (None, CompressionFormat.dense.value):
@@ -209,7 +160,6 @@ def get_model_compressor(
209160

210161
return ModelCompressor.from_pretrained_model(
211162
model,
212-
sparsity_config_or_format=sparsity_config,
213163
quantization_format=quantization_format,
214164
)
215165

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
cadence: "commit"
22
test_type: "regression"
3-
compressed_model_stub: "nm-testing/tinyllama-fp8-dynamic-compressed"
4-
skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
3+
compressed_model_stub: "nm-testing/tinyllama-fp8-dynamic-compressed"
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
cadence: "nightly"
22
test_type: "regression"
3-
compressed_model_stub: "nm-testing/tinyllama-w4a16-compressed"
4-
skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
3+
compressed_model_stub: "nm-testing/tinyllama-w4a16-compressed"
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
cadence: "nightly"
22
test_type: "regression"
3-
compressed_model_stub: "nm-testing/tinyllama-w8a16-dense"
4-
skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
3+
compressed_model_stub: "nm-testing/tinyllama-w8a16-dense"
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
cadence: "commit"
22
test_type: "regression"
3-
compressed_model_stub: "nm-testing/tinyllama-w8a8-compressed"
4-
skeleton_model_stub: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
3+
compressed_model_stub: "nm-testing/tinyllama-w8a8-compressed"

0 commit comments

Comments
 (0)