up

metascroy · metascroy · commit d88601fcc4fe · 2025-04-22T19:41:48.000-07:00
diff --git a/exir/passes/_quant_patterns_and_replacements.py b/exir/passes/_quant_patterns_and_replacements.py
@@ -39,17 +39,17 @@ def _pack_embedding_weight(weight: Tensor, bitwidth: int) -> Tensor:
         weight_1 = weight_view[:, :, 1] << 2
         weight_2 = weight_view[:, :, 2] << 4
         weight_3 = weight_view[:, :, 3] << 6
-        packed_weight = weight_0 + weight_1 + weight_2 + weight_3
+        packed_weight = weight_0 | weight_1 | weight_2 | weight_3
         return packed_weight
     elif bitwidth == 4:
         assert embedding_dim % 2 == 0, "embedding_dim must be divisible by 2"
         weight_range_shifted = weight.add(8).view(torch.uint8)
         weight_view = weight_range_shifted.view(
             weight.shape[0], weight.shape[1] // 2, 2
         )
-        weight_even = weight_view[:, :, 0] * 16  # left shift 4
+        weight_even = weight_view[:, :, 0] << 4
         weight_odd = weight_view[:, :, 1]
-        packed_weight = weight_even + weight_odd
+        packed_weight = weight_even | weight_odd
         return packed_weight
     elif bitwidth == 8:
         return weight
diff --git a/exir/tests/test_quant_fusion_pass.py b/exir/tests/test_quant_fusion_pass.py
@@ -378,17 +378,17 @@ def forward(self, indices):
             # )
 
     def test_embedding_torchao(self) -> None:
-        for bit_width, test_dtype_variant, test_per_group in zip(
+        for bit_width, use_dtype_variant, test_per_group in zip(
             [2, 4, 8], [True, False], [True, False]
         ):
-            self._test_embedding_torchao(bit_width, test_dtype_variant, test_per_group)
+            self._test_embedding_torchao(bit_width, use_dtype_variant, test_per_group)
 
     def _test_embedding_torchao(
-        self, bit_width: int, test_dtype_variant: bool, test_per_group: bool
+        self, bit_width: int, use_dtype_variant: bool, test_per_group: bool
     ) -> None:
         assert bit_width in [2, 4, 8]
         embedding_suffix = f"{bit_width}bit" if bit_width < 8 else "byte"
-        if test_dtype_variant:
+        if use_dtype_variant:
             embedding_suffix = f"{embedding_suffix}_dtype"
 
         indices = torch.tensor([1, 2, 3], dtype=torch.int64)
@@ -399,7 +399,7 @@ def _test_embedding_torchao(
 
         # torchao adds a dtype cast to match embeddings original weight type
         # this does not happen for float32 because it is the default dtype
-        model = model.to(torch.float16) if test_dtype_variant else model
+        model = model.to(torch.float16) if use_dtype_variant else model
 
         # quantize the model
         granularity = PerGroup(32) if test_per_group else PerAxis(0)