diff --git a/setup.py b/setup.py index 3a31361f1e..31453db9a5 100644 --- a/setup.py +++ b/setup.py @@ -122,11 +122,7 @@ def localversion_func(version: ScmVersion) -> str: ), ("tqdm>=4.66.3,<=4.67.1" if BUILD_TYPE == "release" else "tqdm>=4.66.3"), ("torch>=2.9.0,<=2.10.0" if BUILD_TYPE == "release" else "torch>=2.9.0"), - ( - "transformers>=4.56.1,<=4.57.6" - if BUILD_TYPE == "release" - else "transformers>=4.56.1,<=4.57.6" - ), + "transformers>=4.56.1", ("datasets>=4.0.0,<=4.4.1" if BUILD_TYPE == "release" else "datasets>=4.0.0"), ( # auto-round 0.9.1 cannot work with accelerate <1.10.0 @@ -148,7 +144,7 @@ def localversion_func(version: ScmVersion) -> str: ( "compressed-tensors==0.13.0" if BUILD_TYPE == "release" - else "compressed-tensors>=0.13.1a2" + else "compressed-tensors==0.14.1a20260225" ), ], extras_require={ diff --git a/src/llmcompressor/modifiers/transform/smoothquant/base.py b/src/llmcompressor/modifiers/transform/smoothquant/base.py index 1f1da49ea1..5fdaa318ee 100644 --- a/src/llmcompressor/modifiers/transform/smoothquant/base.py +++ b/src/llmcompressor/modifiers/transform/smoothquant/base.py @@ -212,11 +212,8 @@ def _resolve_mappings(self, model: Module) -> list[SmoothQuantMapping]: ) for mapping in self.mappings: - # we deliberately don't use the ignore list when matching mappings - # so that we can handle layers that need smoothing but not all operations - # we only skip if no layers in mapping would be smoothed. for *nested_balance_layers, smooth_layers in match_modules_set( - model, tree_leaves(mapping) + model, tree_leaves(mapping), ignore=self.ignore ): if len(smooth_layers) > 1: raise ValueError( diff --git a/src/llmcompressor/transformers/compression/compressed_tensors_utils.py b/src/llmcompressor/transformers/compression/compressed_tensors_utils.py index f04db96a13..83f1852d40 100644 --- a/src/llmcompressor/transformers/compression/compressed_tensors_utils.py +++ b/src/llmcompressor/transformers/compression/compressed_tensors_utils.py @@ -93,6 +93,16 @@ def save_pretrained_wrapper( # convert to accelerate offloaded for optimal saving with transformers to_accelerate(model) + # Remove hf_device_map before saving to avoid a bug where + # transformers builds module_map with HF model keys but then applies + # revert_weight_conversion (which renames keys to checkpoint format) + # causing a KeyError when looking up renamed keys in module_map. + # After to_accelerate(), all parameters are real tensors (not meta, + # because init_hook is skipped), so model.state_dict() can be used + # directly without the module_map offload path. + if hasattr(model, "hf_device_map"): + delattr(model, "hf_device_map") + # save (compressed) model structure original_save_pretrained.__get__(model, model_class)( save_directory, diff --git a/src/llmcompressor/utils/dev.py b/src/llmcompressor/utils/dev.py index 244cd1489a..6b66c796fa 100644 --- a/src/llmcompressor/utils/dev.py +++ b/src/llmcompressor/utils/dev.py @@ -12,7 +12,10 @@ from loguru import logger from safetensors.torch import save_file from transformers import AutoModelForCausalLM, PreTrainedModel -from transformers.modeling_utils import TORCH_INIT_FUNCTIONS +try: + from transformers.modeling_utils import TORCH_INIT_FUNCTIONS +except ImportError: + from transformers.initialization import TORCH_INIT_FUNCTIONS from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, WEIGHTS_INDEX_NAME __all__ = [