up

metascroy · metascroy · commit 4565a0bc912d · 2025-06-09T14:56:56.000-07:00
diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py
@@ -16,8 +16,6 @@
 
 from executorch.extension.llm.export.builder import DType
 
-from sentencepiece import SentencePieceProcessor
-
 from torchao.dtypes import PackedLinearInt8DynamicActivationIntxWeightLayout
 from torchao.quantization.granularity import PerAxis, PerGroup
 from torchao.quantization.quant_api import (
@@ -516,56 +514,37 @@ def forward(self, input: torch.Tensor) -> torch.Tensor:
 ############################ Source Transform Start #######################
 
 
-def get_quant_embedding_transform(args, dtype_override: Optional[DType] = None):
+def get_quant_embedding_transform(
+    args, use_shared_embedding: bool = False, dtype_override: Optional[DType] = None
+):
     use_torchao = args.embedding_quantize.startswith("torchao:")
     if use_torchao:
         quant_args = args.embedding_quantize.split(":")[1].split(",")
     else:
         quant_args = args.embedding_quantize.split(",")
+    assert len(quant_args) in [
+        2,
+        3,
+    ], f"Expected 2 or 3 embedding quant_args, but got: {quant_args}"
 
     bitwidth = int(quant_args[0])
     group_size = quant_args[0]
     if group_size in ["none", "None", "0"]:
         group_size = 0
     group_size = int(group_size)
-    is_symmetric = bool(quant_args[3]) if len(quant_args) > 2 else True
+    is_symmetric = (
+        bool(quant_args[3].lower() == "true") if len(quant_args) > 2 else True
+    )
 
     weight_dtype = getattr(torch, f"int{bitwidth}")
     granularity = PerAxis(0) if group_size == 0 else PerGroup(group_size)
     mapping_type = MappingType.SYMMETRIC if is_symmetric else MappingType.ASYMMETRIC
 
     if use_torchao:
-def get_quant_embedding_transform(
-    embedding_quantize: str,
-    use_shared_embedding: bool = False,
-    dtype_override: Optional[DType] = None,
-):
-    if embedding_quantize.startswith("torchao:"):
         from torchao.experimental.quant_api import (
             EmbeddingQuantizer,
             SharedEmbeddingQuantizer,
         )
-        from torchao.quantization.granularity import PerAxis, PerGroup
-        from torchao.quantization.quant_api import MappingType
-
-        quant_args = embedding_quantize.split(":")[1].split(",")
-        if len(quant_args) == 2:
-            bitwidth, group_size = quant_args
-            is_asymmetric = True
-        else:
-            bitwidth, group_size, is_asymmetric = quant_args
-
-        if group_size in ["none", "None", "0"]:
-            group_size = 0
-
-        group_size = int(group_size)
-        bitwidth = int(bitwidth)
-        is_asymmetric = bool(is_asymmetric)
-        weight_dtype = getattr(torch, f"int{bitwidth}")
-        granularity = PerAxis(0) if group_size == 0 else PerGroup(group_size)
-        mapping_type = (
-            MappingType.ASYMMETRIC if is_asymmetric else MappingType.SYMMETRIC
-        )
 
         def _torchao_embedding_quantizer(model):
             with torch.no_grad():
@@ -599,6 +578,7 @@ def _quantize_embedding(model):
                 granularity=granularity,
                 mapping_type=mapping_type,
             ),
+            lambda m, fqn: isinstance(m, nn.Embedding),
         )
         return model