Skip to content

Error when configs are created with unrecognized fields #386

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Aug 11, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def compress_model(self, model: Module):

if prefix in module_to_scheme or prefix in sparse_compression_targets:
module_device = get_execution_device(module).type
is_meta = (module_device == "meta")
is_meta = module_device == "meta"

exec_device = "meta" if is_meta else "cpu"
onloading_device = "meta" if is_meta else module_device
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,13 @@ def sparse24_bitmask_compress(

if tensor.is_meta:
num_rows, num_cols = tensor.shape
compressed_values = torch.empty((num_rows, num_cols // 2), dtype=tensor.dtype, device="meta")
compressed_values = torch.empty(
(num_rows, num_cols // 2), dtype=tensor.dtype, device="meta"
)
packed_cols = (num_cols + 7) // 8
bitmasks_packed = torch.empty((num_rows, packed_cols), dtype=torch.uint8, device="meta")
bitmasks_packed = torch.empty(
(num_rows, packed_cols), dtype=torch.uint8, device="meta"
)
return compressed_values, bitmasks_packed

bytemasks = get_24_bytemasks(tensor=tensor)
Expand Down
7 changes: 6 additions & 1 deletion src/compressed_tensors/quantization/lifecycle/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,12 @@ def _initialize_scale_zero_point(
else:
# TODO: consider erroring out in the future as if the dtype if not one of these,
# there is likely bug
if scale_dtype not in [torch.float16, torch.bfloat16, torch.float32, torch.float64]:
if scale_dtype not in [
torch.float16,
torch.bfloat16,
torch.float32,
torch.float64,
]:
scale_dtype = torch.float16
zp_dtype = quantization_args.pytorch_dtype()

Expand Down
3 changes: 2 additions & 1 deletion src/compressed_tensors/quantization/quant_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import torch
from compressed_tensors.utils import Aliasable
from compressed_tensors.utils.helpers import deprecated
from pydantic import BaseModel, Field, field_validator, model_validator
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator


__all__ = [
Expand Down Expand Up @@ -186,6 +186,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
"Observers constructor excluding quantization range or symmetry"
),
)
model_config = ConfigDict(extra="forbid")

@field_validator("type", mode="before")
def validate_type(cls, value) -> QuantizationType:
Expand Down
8 changes: 6 additions & 2 deletions src/compressed_tensors/quantization/quant_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

from enum import Enum
from typing import Dict, List, Optional, Union
from typing import Annotated, Any, Dict, List, Optional, Union

from compressed_tensors.config import CompressionFormat
from compressed_tensors.quantization.quant_args import DynamicType, QuantizationArgs
Expand All @@ -26,7 +26,7 @@
module_type,
parse_out_kv_cache_args,
)
from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field
from torch.nn import Module


Expand Down Expand Up @@ -142,6 +142,10 @@ class QuantizationConfig(BaseModel):
quantization_status: QuantizationStatus = QuantizationStatus.INITIALIZED
global_compression_ratio: Optional[float] = None
ignore: Optional[List[str]] = Field(default_factory=list)
# `run_compressed` is a dummy, unused arg for backwards compatibility
# see: https://github.com/huggingface/transformers/pull/39324
run_compressed: Annotated[Any, Field(exclude=True)] = None
model_config = ConfigDict(extra="forbid")

def model_post_init(self, __context):
"""
Expand Down
5 changes: 3 additions & 2 deletions src/compressed_tensors/quantization/quant_scheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@
# limitations under the License.

from copy import deepcopy
from typing import Any, Dict, List, Optional
from typing import List, Optional

from compressed_tensors.quantization.quant_args import (
DynamicType,
QuantizationArgs,
QuantizationStrategy,
QuantizationType,
)
from pydantic import BaseModel, model_validator
from pydantic import BaseModel, ConfigDict, model_validator


__all__ = [
Expand All @@ -47,6 +47,7 @@ class QuantizationScheme(BaseModel):
weights: Optional[QuantizationArgs] = None
input_activations: Optional[QuantizationArgs] = None
output_activations: Optional[QuantizationArgs] = None
model_config = ConfigDict(extra="forbid")

@model_validator(mode="after")
def validate_model_after(model: "QuantizationScheme") -> "QuantizationScheme":
Expand Down
3 changes: 2 additions & 1 deletion src/compressed_tensors/transform/transform_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from enum import Enum
from typing import List

from pydantic import BaseModel, Field, field_validator
from pydantic import BaseModel, ConfigDict, Field, field_validator


__all__ = ["TransformArgs", "TransformLocation"]
Expand Down Expand Up @@ -61,6 +61,7 @@ class TransformArgs(BaseModel):
location: TransformLocation
inverse: bool = Field(default=False)
ignore: List[str] = Field(default_factory=list)
model_config = ConfigDict(extra="forbid")

@field_validator("targets", "ignore", mode="before")
@classmethod
Expand Down
3 changes: 2 additions & 1 deletion src/compressed_tensors/transform/transform_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from typing import Dict

from compressed_tensors.transform import TransformArgs, TransformScheme
from pydantic import BaseModel
from pydantic import BaseModel, ConfigDict


__all__ = ["TransformConfig"]
Expand All @@ -31,6 +31,7 @@ class TransformConfig(BaseModel):
"""

config_groups: Dict[str, TransformScheme]
model_config = ConfigDict(extra="forbid")


# quip / quip sharp
Expand Down
3 changes: 2 additions & 1 deletion src/compressed_tensors/transform/transform_scheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from typing import List

from compressed_tensors.transform import TransformArgs
from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field


__all__ = ["TransformScheme"]
Expand All @@ -40,3 +40,4 @@ class TransformScheme(BaseModel):
apply: List[TransformArgs] = Field(default_factory=list)
randomize: bool = Field(default=False)
requires_grad: bool = Field(default=False)
model_config = ConfigDict(extra="forbid")
Original file line number Diff line number Diff line change
Expand Up @@ -446,10 +446,7 @@ def test_compress_model_meta(model_stub, q_format, s_config):
cpu_model, s_config, q_format
)
# Only stores dtype because meta model does not store values
expected = {
k: v.dtype
for k, v in reference_compressor.compress(cpu_model).items()
}
expected = {k: v.dtype for k, v in reference_compressor.compress(cpu_model).items()}

# Load model on meta device
meta_model = AutoModelForCausalLM.from_pretrained(
Expand Down
18 changes: 9 additions & 9 deletions tests/test_transform/factory/test_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@


@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
@pytest.mark.parametrize("randomized", (True, False))
def test_correctness_linear(type, randomized):
@pytest.mark.parametrize("randomize", (True, False))
def test_correctness_linear(type, randomize):
size = (4, 8)
module = torch.nn.Linear(*size, bias=True)
scheme = TransformScheme(type=type, randomized=randomized)
scheme = TransformScheme(type=type, randomize=randomize)
factory = TransformFactory.from_scheme(scheme, name="")

input_tfm = factory.create_transform(
Expand All @@ -55,8 +55,8 @@ def test_correctness_linear(type, randomized):


@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
@pytest.mark.parametrize("randomized", (True, False))
def test_correctness_model(type, randomized, model_apply, offload=False):
@pytest.mark.parametrize("randomize", (True, False))
def test_correctness_model(type, randomize, model_apply, offload=False):
# load model
model = model_apply[0]
if offload:
Expand All @@ -71,7 +71,7 @@ def test_correctness_model(type, randomized, model_apply, offload=False):
# apply transforms
config = TransformConfig(
config_groups={
"": TransformScheme(type=type, randomized=randomized, apply=model_apply[1])
"": TransformScheme(type=type, randomize=randomize, apply=model_apply[1])
}
)
apply_transform_config(model, config)
Expand All @@ -84,6 +84,6 @@ def test_correctness_model(type, randomized, model_apply, offload=False):
@requires_gpu
@requires_accelerate()
@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
@pytest.mark.parametrize("randomized", (True, False))
def test_correctness_model_offload(type, randomized, model_apply):
test_correctness_model(type, randomized, model_apply, offload=True)
@pytest.mark.parametrize("randomize", (True, False))
def test_correctness_model_offload(type, randomize, model_apply):
test_correctness_model(type, randomize, model_apply, offload=True)
15 changes: 6 additions & 9 deletions tests/test_transform/factory/test_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@


@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
@pytest.mark.parametrize("randomized", (True, False))
@pytest.mark.parametrize("randomize", (True, False))
@pytest.mark.parametrize("requires_grad", (True, False))
def test_memory_sharing(type, randomized, requires_grad, offload=False):
def test_memory_sharing(type, randomize, requires_grad, offload=False):
# load model (maybe with offloading)
model = TransformableModel(2, 2, 4, 4, 8, 8)
if offload:
Expand All @@ -42,7 +42,7 @@ def test_memory_sharing(type, randomized, requires_grad, offload=False):
config_groups={
"": TransformScheme(
type=type,
randomzied=randomized,
randomize=randomize,
requires_grad=requires_grad,
apply=[
TransformArgs(targets="Linear", location="input"),
Expand Down Expand Up @@ -84,9 +84,6 @@ def test_memory_sharing(type, randomized, requires_grad, offload=False):
@requires_gpu
@requires_accelerate()
@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
@pytest.mark.parametrize("randomized", (True, False))
def test_memory_sharing_offload(
type,
randomized,
):
test_memory_sharing(type, randomized, requires_grad=False, offload=True)
@pytest.mark.parametrize("randomize", (True, False))
def test_memory_sharing_offload(type, randomize):
test_memory_sharing(type, randomize, requires_grad=False, offload=True)