up

metascroy · metascroy · commit 93948a4e8e48 · 2025-08-11T17:48:05.000-07:00
diff --git a/test/quantization/quantize_/workflows/intx/test_intx_unpacked_tensor.py b/test/quantization/quantize_/workflows/intx/test_intx_unpacked_tensor.py
@@ -29,9 +29,20 @@ def setUp(self):
         self.config = IntxWeightOnlyConfig(
             weight_dtype=torch.int4,
             granularity=PerGroup(32),
-            VERSION=2,
+            version=2,
         )
 
+    def test_embedding(self):
+        dtype = torch.bfloat16
+        device = "cpu"
+        input = torch.randint(low=0, high=128, size=(10,), device=device)
+        embedding = torch.nn.Embedding(128, 256, dtype=dtype, device=device)
+        original = embedding(input)
+        quantize_(embedding, self.config)
+        quantized = embedding(input)
+        error = compute_error(original, quantized)
+        self.assertTrue(error > 20)
+
     def test_linear(self):
         dtype = torch.bfloat16
         device = "cpu"
diff --git a/torchao/experimental/tests/test_embedding_xbit_quantizer.py b/torchao/experimental/tests/test_embedding_xbit_quantizer.py
@@ -185,7 +185,7 @@ def test_shared_embedding(self):
         # Check the shared_embedding and linear ops use the same lifted weight
         expected_lines = [
             "torch.ops.torchao._shared_embedding_4bit.default",
-            "torch.ops.torchao._linear_8bit_act_4bit_weight.defaul",
+            "torch.ops.torchao._linear_8bit_act_4bit_weight.default",
         ]
         for line in expected_lines:
             FileCheck().check_count(line, 1, exactly=True).run(
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -564,6 +564,10 @@ def _linear_extra_repr(self):
     return f"in_features={self.weight.shape[1]}, out_features={self.weight.shape[0]}, weight={_quantization_type(self.weight)}"
 
 
+def _embedding_extra_repr(self):
+    return f"num_embeddings={self.weight.shape[0]}, embedding_dim={self.weight.shape[1]}, weight={_quantization_type(self.weight)}"
+
+
 def _get_linear_subclass_inserter(
     constructor, *, allow_requires_grad=False, propagate_bias=False, **kwargs
 ):
@@ -2061,8 +2065,8 @@ class IntxWeightOnlyConfig(AOBaseConfig):
     mapping_type: MappingType = MappingType.SYMMETRIC
     scale_dtype: Optional[torch.dtype] = None
     layout: Layout = QDQLayout()
-    packing_format: PackingFormat = PackingFormat.UNPACKED
-    VERSION: int = 1
+    packing_format: PackingFormat = PackingFormat.UNPACKED_TO_INT8
+    version: int = 1
 
     def __post_init__(self):
         assert TORCH_VERSION_AT_LEAST_2_6, "IntxWeightOnlyConfig requires torch 2.6+"
@@ -2104,9 +2108,9 @@ def _intx_weight_only_quantize_tensor(weight, config):
 
     block_size = (1, group_size)
 
-    if config.VERSION == 2:
-        if config.packing_format == PackingFormat.UNPACKED:
-            new_weight = IntxUnpackedTensor.from_float(
+    if config.version == 2:
+        if config.packing_format == PackingFormat.UNPACKED_TO_INT8:
+            new_weight = IntxUnpackedTensor.from_hp(
                 weight,
                 block_size,
                 weight_dtype,
@@ -2146,7 +2150,12 @@ def _intx_weight_only_transform(
     )
     new_weight = _intx_weight_only_quantize_tensor(module.weight, config)
     module.weight = torch.nn.Parameter(new_weight, requires_grad=False)
-    module.extra_repr = types.MethodType(_linear_extra_repr, module)
+
+    if isinstance(module, nn.Linear):
+        module.extra_repr = types.MethodType(_linear_extra_repr, module)
+    elif isinstance(module, nn.Embedding):
+        module.extra_repr = types.MethodType(_embedding_extra_repr, module)
+
     return module
 
 
diff --git a/torchao/quantization/quantize_/common/packing_format.py b/torchao/quantization/quantize_/common/packing_format.py
@@ -34,4 +34,4 @@ class PackingFormat(str, Enum):
     """
     Unpacked means the subbyte quantized data is stored as int8
     """
-    UNPACKED = "unpacked"
+    UNPACKED_TO_INT8 = "unpacked_to_int8"
diff --git a/torchao/quantization/quantize_/workflows/intx/intx_unpacked_tensor.py b/torchao/quantization/quantize_/workflows/intx/intx_unpacked_tensor.py
@@ -55,8 +55,8 @@ class IntxUnpackedTensor(TorchAOBaseTensor):
         block_size: the block size for quantization, representing the granularity, for example groupwise quantization will have block_size (1, group_size)
     """
 
-    tensor_data_attrs = ["int_data", "scale", "zero_point"]
-    tensor_attributes = ["bit_width", "block_size"]
+    tensor_data_names = ["int_data", "scale", "zero_point"]
+    tensor_attribute_names = ["bit_width", "block_size"]
 
     def __new__(cls, int_data, scale, zero_point, bit_width, block_size=None):
         kwargs = {}
@@ -105,30 +105,10 @@ def __init__(
         self.bit_width = bit_width
         self.block_size = block_size
 
-    def __tensor_flatten__(self):
-        return self.tensor_data_attrs, [
-            getattr(self, attr) for attr in self.tensor_attributes
-        ]
-
-    @classmethod
-    def __tensor_unflatten__(
-        cls, tensor_data_dict, tensor_attributes, outer_size, outer_stride
-    ):
-        return cls(
-            *[tensor_data_dict[name] for name in cls.tensor_data_attrs],
-            *tensor_attributes,
-        )
-
-    def _apply_fn_to_data(self, fn):
-        return self.__class__(
-            *[fn(getattr(self, attr)) for attr in self.tensor_data_attrs],
-            *[getattr(self, attr) for attr in self.tensor_attributes],
-        )
-
     def __repr__(self):
         repr_fields = (
-            self.tensor_data_attrs
-            + self.tensor_attributes
+            self.tensor_data_names
+            + self.tensor_attribute_names
             + ["shape", "device", "dtype", "require_grad"]
         )
         inner_repr = [f"{attr}={getattr(self, attr)}" for attr in repr_fields]
@@ -157,14 +137,17 @@ def to(self, *args, **kwargs):
         )
 
     @classmethod
-    def from_float(
+    def from_hp(
         cls,
         float_tensor: torch.Tensor,
         block_size: Tuple[int],
         dtype: torch.dtype,
         *,
         mapping_type: MappingType = MappingType.SYMMETRIC,
     ):
+        """
+        Create an IntxUnpackedTensor from a high-precision tensor
+        """
         qmin, qmax = _DTYPE_TO_QVALUE_BOUNDS[dtype]
         bit_width = _DTYPE_TO_BIT_WIDTH[dtype]
         scale, zero_point = choose_qparams_affine(
@@ -234,44 +217,6 @@ def _(func, types, args, kwargs):
     return torch.nn.functional.embedding(indices, weight_tensor, **kwargs)
 
 
-@implements([aten.detach.default, aten.alias.default])
-def _(func, types, args, kwargs):
-    return return_and_correct_aliasing(
-        func, args, kwargs, args[0]._apply_fn_to_data(torch.detach)
-    )
-
-
-@implements(aten.clone.default)
-def _(func, types, args, kwargs):
-    return return_and_correct_aliasing(
-        func, args, kwargs, args[0]._apply_fn_to_data(torch.clone)
-    )
-
-
-def _same_metadata(self: "IntxUnpackedTensor", src: "IntxUnpackedTensor") -> bool:
-    return (
-        isinstance(self, IntxUnpackedTensor)
-        and isinstance(src, IntxUnpackedTensor)
-        and all(
-            getattr(self, attr) == getattr(src, attr) for attr in self.tensor_attributes
-        )
-    )
-
-
-@implements(aten.copy_.default)
-def _(func, types, args, kwargs):
-    self = args[0]
-    src = args[1]
-    if _same_metadata(self, src):
-        self_tensors = self.__tensor_flatten__()[0]
-        for tensor_name in self_tensors:
-            getattr(self, tensor_name).copy_(getattr(src, tensor_name))
-        return
-    raise ValueError(
-        f"Not supported args for copy_ due to metadata mismatch: {args[0], args[1]}"
-    )
-
-
 @implements(aten.slice.Tensor)
 def _(func, types, args, kwargs):
     self, dim, start, end, step = fill_defaults(args, 5, [0, None, None, 1])

Original file line number	Diff line number	Diff line change
`@@ -185,7 +185,7 @@ def test_shared_embedding(self):`
`185`	`185`	`# Check the shared_embedding and linear ops use the same lifted weight`
`186`	`186`	`expected_lines = [`
`187`	`187`	`"torch.ops.torchao._shared_embedding_4bit.default",`
`188`		`- "torch.ops.torchao._linear_8bit_act_4bit_weight.defaul",`
	`188`	`+ "torch.ops.torchao._linear_8bit_act_4bit_weight.default",`
`189`	`189`	`]`
`190`	`190`	`for line in expected_lines:`
`191`	`191`	`FileCheck().check_count(line, 1, exactly=True).run(`