neuralmagic · kylesayrs · Aug 11, 2025 · Jul 10, 2025 · Jul 10, 2025 · Jul 10, 2025
diff --git a/src/compressed_tensors/compressors/model_compressors/model_compressor.py b/src/compressed_tensors/compressors/model_compressors/model_compressor.py
@@ -393,7 +393,7 @@ def compress_model(self, model: Module):
 
             if prefix in module_to_scheme or prefix in sparse_compression_targets:
                 module_device = get_execution_device(module).type
-                is_meta = (module_device == "meta")
+                is_meta = module_device == "meta"
 
                 exec_device = "meta" if is_meta else "cpu"
                 onloading_device = "meta" if is_meta else module_device

diff --git a/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py b/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py
@@ -178,9 +178,13 @@ def sparse24_bitmask_compress(
 
     if tensor.is_meta:
         num_rows, num_cols = tensor.shape
-        compressed_values = torch.empty((num_rows, num_cols // 2), dtype=tensor.dtype, device="meta")
+        compressed_values = torch.empty(
+            (num_rows, num_cols // 2), dtype=tensor.dtype, device="meta"
+        )
         packed_cols = (num_cols + 7) // 8
-        bitmasks_packed = torch.empty((num_rows, packed_cols), dtype=torch.uint8, device="meta")
+        bitmasks_packed = torch.empty(
+            (num_rows, packed_cols), dtype=torch.uint8, device="meta"
+        )
         return compressed_values, bitmasks_packed
 
     bytemasks = get_24_bytemasks(tensor=tensor)

diff --git a/src/compressed_tensors/quantization/lifecycle/initialize.py b/src/compressed_tensors/quantization/lifecycle/initialize.py
@@ -189,7 +189,12 @@ def _initialize_scale_zero_point(
     else:
         # TODO: consider erroring out in the future as if the dtype if not one of these,
         # there is likely bug
-        if scale_dtype not in [torch.float16, torch.bfloat16, torch.float32, torch.float64]:
+        if scale_dtype not in [
+            torch.float16,
+            torch.bfloat16,
+            torch.float32,
+            torch.float64,
+        ]:
             scale_dtype = torch.float16
         zp_dtype = quantization_args.pytorch_dtype()
 

diff --git a/src/compressed_tensors/quantization/quant_args.py b/src/compressed_tensors/quantization/quant_args.py
@@ -19,7 +19,7 @@
 import torch
 from compressed_tensors.utils import Aliasable
 from compressed_tensors.utils.helpers import deprecated
-from pydantic import BaseModel, Field, field_validator, model_validator
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 
 
 __all__ = [
@@ -186,6 +186,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
             "Observers constructor excluding quantization range or symmetry"
         ),
     )
+    model_config = ConfigDict(extra="forbid")
 
     @field_validator("type", mode="before")
     def validate_type(cls, value) -> QuantizationType:

diff --git a/src/compressed_tensors/quantization/quant_config.py b/src/compressed_tensors/quantization/quant_config.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from enum import Enum
-from typing import Dict, List, Optional, Union
+from typing import Annotated, Any, Dict, List, Optional, Union
 
 from compressed_tensors.config import CompressionFormat
 from compressed_tensors.quantization.quant_args import DynamicType, QuantizationArgs
@@ -26,7 +26,7 @@
     module_type,
     parse_out_kv_cache_args,
 )
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 from torch.nn import Module
 
 
@@ -142,6 +142,10 @@ class QuantizationConfig(BaseModel):
     quantization_status: QuantizationStatus = QuantizationStatus.INITIALIZED
     global_compression_ratio: Optional[float] = None
     ignore: Optional[List[str]] = Field(default_factory=list)
+    # `run_compressed` is a dummy, unused arg for backwards compatibility
+    # see: https://github.com/huggingface/transformers/pull/39324
+    run_compressed: Annotated[Any, Field(exclude=True)] = None
+    model_config = ConfigDict(extra="forbid")
 
     def model_post_init(self, __context):
         """

diff --git a/src/compressed_tensors/quantization/quant_scheme.py b/src/compressed_tensors/quantization/quant_scheme.py
@@ -13,15 +13,15 @@
 # limitations under the License.
 
 from copy import deepcopy
-from typing import Any, Dict, List, Optional
+from typing import List, Optional
 
 from compressed_tensors.quantization.quant_args import (
     DynamicType,
     QuantizationArgs,
     QuantizationStrategy,
     QuantizationType,
 )
-from pydantic import BaseModel, model_validator
+from pydantic import BaseModel, ConfigDict, model_validator
 
 
 __all__ = [
@@ -47,6 +47,7 @@ class QuantizationScheme(BaseModel):
     weights: Optional[QuantizationArgs] = None
     input_activations: Optional[QuantizationArgs] = None
     output_activations: Optional[QuantizationArgs] = None
+    model_config = ConfigDict(extra="forbid")
 
     @model_validator(mode="after")
     def validate_model_after(model: "QuantizationScheme") -> "QuantizationScheme":

diff --git a/src/compressed_tensors/transform/transform_args.py b/src/compressed_tensors/transform/transform_args.py
@@ -15,7 +15,7 @@
 from enum import Enum
 from typing import List
 
-from pydantic import BaseModel, Field, field_validator
+from pydantic import BaseModel, ConfigDict, Field, field_validator
 
 
 __all__ = ["TransformArgs", "TransformLocation"]
@@ -61,6 +61,7 @@ class TransformArgs(BaseModel):
     location: TransformLocation
     inverse: bool = Field(default=False)
     ignore: List[str] = Field(default_factory=list)
+    model_config = ConfigDict(extra="forbid")
 
     @field_validator("targets", "ignore", mode="before")
     @classmethod

diff --git a/src/compressed_tensors/transform/transform_config.py b/src/compressed_tensors/transform/transform_config.py
@@ -15,7 +15,7 @@
 from typing import Dict
 
 from compressed_tensors.transform import TransformArgs, TransformScheme
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict
 
 
 __all__ = ["TransformConfig"]
@@ -31,6 +31,7 @@ class TransformConfig(BaseModel):
     """
 
     config_groups: Dict[str, TransformScheme]
+    model_config = ConfigDict(extra="forbid")
 
 
 # quip / quip sharp

diff --git a/src/compressed_tensors/transform/transform_scheme.py b/src/compressed_tensors/transform/transform_scheme.py
@@ -15,7 +15,7 @@
 from typing import List
 
 from compressed_tensors.transform import TransformArgs
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 
 __all__ = ["TransformScheme"]
@@ -40,3 +40,4 @@ class TransformScheme(BaseModel):
     apply: List[TransformArgs] = Field(default_factory=list)
     randomize: bool = Field(default=False)
     requires_grad: bool = Field(default=False)
+    model_config = ConfigDict(extra="forbid")
diff --git a/tests/test_compressors/model_compressors/test_model_compressor.py b/tests/test_compressors/model_compressors/test_model_compressor.py
@@ -446,10 +446,7 @@ def test_compress_model_meta(model_stub, q_format, s_config):
         cpu_model, s_config, q_format
     )
     # Only stores dtype because meta model does not store values
-    expected = {
-        k: v.dtype
-        for k, v in reference_compressor.compress(cpu_model).items()
-    }
+    expected = {k: v.dtype for k, v in reference_compressor.compress(cpu_model).items()}
 
     # Load model on meta device
     meta_model = AutoModelForCausalLM.from_pretrained(

diff --git a/tests/test_transform/factory/test_correctness.py b/tests/test_transform/factory/test_correctness.py
@@ -26,11 +26,11 @@
 
 
 @pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
-@pytest.mark.parametrize("randomized", (True, False))
-def test_correctness_linear(type, randomized):
+@pytest.mark.parametrize("randomize", (True, False))
+def test_correctness_linear(type, randomize):
     size = (4, 8)
     module = torch.nn.Linear(*size, bias=True)
-    scheme = TransformScheme(type=type, randomized=randomized)
+    scheme = TransformScheme(type=type, randomize=randomize)
     factory = TransformFactory.from_scheme(scheme, name="")
 
     input_tfm = factory.create_transform(
@@ -55,8 +55,8 @@ def test_correctness_linear(type, randomized):
 
 
 @pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
-@pytest.mark.parametrize("randomized", (True, False))
-def test_correctness_model(type, randomized, model_apply, offload=False):
+@pytest.mark.parametrize("randomize", (True, False))
+def test_correctness_model(type, randomize, model_apply, offload=False):
     # load model
     model = model_apply[0]
     if offload:
@@ -71,7 +71,7 @@ def test_correctness_model(type, randomized, model_apply, offload=False):
     # apply transforms
     config = TransformConfig(
         config_groups={
-            "": TransformScheme(type=type, randomized=randomized, apply=model_apply[1])
+            "": TransformScheme(type=type, randomize=randomize, apply=model_apply[1])
         }
     )
     apply_transform_config(model, config)
@@ -84,6 +84,6 @@ def test_correctness_model(type, randomized, model_apply, offload=False):
 @requires_gpu
 @requires_accelerate()
 @pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
-@pytest.mark.parametrize("randomized", (True, False))
-def test_correctness_model_offload(type, randomized, model_apply):
-    test_correctness_model(type, randomized, model_apply, offload=True)
+@pytest.mark.parametrize("randomize", (True, False))
+def test_correctness_model_offload(type, randomize, model_apply):
+    test_correctness_model(type, randomize, model_apply, offload=True)
diff --git a/tests/test_transform/factory/test_memory.py b/tests/test_transform/factory/test_memory.py
@@ -29,9 +29,9 @@
 
 
 @pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
-@pytest.mark.parametrize("randomized", (True, False))
+@pytest.mark.parametrize("randomize", (True, False))
 @pytest.mark.parametrize("requires_grad", (True, False))
-def test_memory_sharing(type, randomized, requires_grad, offload=False):
+def test_memory_sharing(type, randomize, requires_grad, offload=False):
     # load model (maybe with offloading)
     model = TransformableModel(2, 2, 4, 4, 8, 8)
     if offload:
@@ -42,7 +42,7 @@ def test_memory_sharing(type, randomized, requires_grad, offload=False):
         config_groups={
             "": TransformScheme(
                 type=type,
-                randomzied=randomized,
+                randomize=randomize,
                 requires_grad=requires_grad,
                 apply=[
                     TransformArgs(targets="Linear", location="input"),
@@ -84,9 +84,6 @@ def test_memory_sharing(type, randomized, requires_grad, offload=False):
 @requires_gpu
 @requires_accelerate()
 @pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
-@pytest.mark.parametrize("randomized", (True, False))
-def test_memory_sharing_offload(
-    type,
-    randomized,
-):
-    test_memory_sharing(type, randomized, requires_grad=False, offload=True)
+@pytest.mark.parametrize("randomize", (True, False))
+def test_memory_sharing_offload(type, randomize):
+    test_memory_sharing(type, randomize, requires_grad=False, offload=True)