Update

swolchok · swolchok · commit cece5bce8f8d · 2025-02-25T14:28:29.000-08:00
[ghstack-poisoned]
diff --git a/.buckconfig b/.buckconfig
@@ -11,6 +11,7 @@
   shim_et = shim_et
 
 [repository_aliases]
+  bazel_skylib = shim
   config = prelude
   ovr_config = prelude
   toolchains = shim_et
diff --git a/.ci/scripts/unittest-buck2.sh b/.ci/scripts/unittest-buck2.sh
@@ -8,11 +8,10 @@ set -eux
 
 # TODO: expand this to //...
 # TODO: can't query cadence & vulkan backends
-buck2 query "//backends/apple/... + //backends/arm/... + \
-//backends/example/... + //backends/mediatek/... + //backends/test/... + \
-//backends/transforms/... + //backends/xnnpack/... + //configurations/... + \
-//kernels/portable/cpu/... + //runtime/... + //schema/... + //test/... + \
-//util/..."
+buck2 query "//backends/apple/... + //backends/example/... + \
+//backends/mediatek/... + //backends/test/... + //backends/transforms/... + \
+//backends/xnnpack/... + //configurations/... + //kernels/portable/cpu/... + \
+//runtime/... + //schema/... + //test/... + //util/..."
 
 # TODO: expand the covered scope of Buck targets.
 buck2 build //runtime/core/portable_type/...
diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py
@@ -119,11 +119,10 @@ def quantize(  # noqa C901
         # Check for required args
         if group_size is None:
             raise Exception("For 8da4w quantization, group size must be specified.")
-        from torchao.quantization.quant_api import Int8DynActInt4WeightQuantizer
 
-        model = Int8DynActInt4WeightQuantizer(
-            precision=torch_dtype, groupsize=group_size
-        ).quantize(model)
+        from torchao.quantization import int8_dynamic_activation_int4_weight, quantize_
+
+        quantize_(model, int8_dynamic_activation_int4_weight(group_size=group_size))
 
         if verbose:
             print("quantized model:", model)
@@ -663,7 +662,7 @@ def convert_for_runtime(self) -> nn.Module:
     def quantized_model(self) -> nn.Module:
         model_updated_state_dict = self.create_quantized_state_dict(self.packed)
         self.convert_for_runtime()
-        self.mod.load_state_dict(model_updated_state_dict)
+        self.mod.load_state_dict(model_updated_state_dict, assign=True)
         return self.mod