pytorch
diff --git a/‎setup.py
Lines changed: 2 additions & 2 deletions b/‎setup.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎torchao/experimental/tests/test_groupwise_lowbit_weight_lut_quantizer.py
Lines changed: 31 additions & 25 deletions b/‎torchao/experimental/tests/test_groupwise_lowbit_weight_lut_quantizer.py
Lines changed: 31 additions & 25 deletions
@@ -728,8 +728,8 @@ def bool_to_on_off(value):
 
 
 # Only check submodules if we're going to build C++ extensions
-if use_cpp != "0":
-    check_submodules()
+# if use_cpp != "0":
+#     check_submodules()
 
 setup(
     name="torchao",
 
@@ -16,8 +16,9 @@
     GroupwiseLutWeightConfig,
 )
 from torchao.quantization.quant_api import quantize_
-from torchao.quantization.granularity import PerGroup
-
+from torchao.prototype.quantization.codebook_utils.codebook_utils import (
+    group_size_to_block_shapes
+)
 
 class TestGroupwiseLowbitWeightLut(unittest.TestCase):
     """
@@ -27,30 +28,29 @@ class TestGroupwiseLowbitWeightLut(unittest.TestCase):
 
     TEST_CASES = [
         param(
-            weight_dtype=weight_dtype,
+            code_dtype=code_dtype,
             lut_group_size=lut_group_size,
             scale_group_size=scale_group_size,
-            model_dtype=model_dtype,
+            weight_dtype=weight_dtype,
             has_bias=has_bias,
             has_scales=has_scales,
         )
-        for weight_dtype in [uint1, uint2, uint3, uint4]
+        for code_dtype in [uint1, uint2, uint3, uint4]
         for lut_group_size, scale_group_size in [(256, 64), (256, 32)]
-        for model_dtype in [torch.float32]
+        for weight_dtype in [torch.float32]
         for has_bias in [True, False]
         for has_scales in [True, False]
     ]
-
     # --------------------------------------------------------------------------
     # Test 1: End-to-End Model Accuracy
     # --------------------------------------------------------------------------
     @parameterized.expand(TEST_CASES)
     def test_e2e_accuracy_vs_reference(
         self,
-        weight_dtype,
+        code_dtype,
         lut_group_size,
         scale_group_size,
-        model_dtype,
+        weight_dtype,
         has_bias,
         has_scales,
     ):
@@ -59,19 +59,20 @@ def test_e2e_accuracy_vs_reference(
         This now uses the `use_qdq_reference` flag instead of layout objects.
         """
         m, k, n = 3, 64, 32
-        activations = torch.randn(m, k, dtype=model_dtype)
-        model = nn.Sequential(nn.Linear(k, n, bias=has_bias)).to(dtype=model_dtype)
+        activations = torch.randn(m, k, dtype=weight_dtype)
+        model = nn.Sequential(nn.Linear(k, n, bias=has_bias)).to(dtype=weight_dtype)
 
-        lut_granularity = PerGroup(lut_group_size)
-        scale_granularity = PerGroup(scale_group_size) if has_scales else None
+        lut_block_shape, scale_block_shape = group_size_to_block_shapes(lut_group_size=lut_group_size, tensor_shape=(n, k), scale_group_size=scale_group_size if has_scales else None)
 
         # --- Quantize using C++ ops ---
         quantized_model = copy.deepcopy(model)
         perf_config = GroupwiseLutWeightConfig(
+            code_dtype=code_dtype,
             weight_dtype=weight_dtype,
-            lut_granularity=lut_granularity,
-            scale_granularity=scale_granularity,
-            use_qdq_reference=False,  # This creates the custom tensor
+            lut_block_shape=lut_block_shape,
+            scale_block_shape=scale_block_shape,
+            use_qdq_reference=False,
+            has_scale=has_scales,
         )
         quantize_(quantized_model, perf_config)
         with torch.no_grad():
@@ -80,10 +81,12 @@ def test_e2e_accuracy_vs_reference(
         # --- Quantize for Reference (using Python ops) ---
         reference_model = copy.deepcopy(model)
         ref_config = GroupwiseLutWeightConfig(
+            code_dtype=code_dtype,
             weight_dtype=weight_dtype,
-            lut_granularity=lut_granularity,
-            scale_granularity=scale_granularity,
+            lut_block_shape=lut_block_shape,
+            scale_block_shape=scale_block_shape,
             use_qdq_reference=True,
+            has_scale=has_scales,
         )
         quantize_(reference_model, ref_config)
         with torch.no_grad():
@@ -107,28 +110,31 @@ def tearDown(self):
     @parameterized.expand(TEST_CASES)
     def test_export_compile_aoti(
         self,
-        weight_dtype,
+        code_dtype,
         lut_group_size,
         scale_group_size,
-        model_dtype,
+        weight_dtype,
         has_bias,
         has_scales,
     ):
         """
         Tests that the quantized model can be exported and compiled.
         """
         k, n = 64, 32
-        activations = torch.randn(2, k, dtype=model_dtype)
+        activations = torch.randn(2, k, dtype=weight_dtype)
         model = (
-            nn.Sequential(nn.Linear(k, n, bias=has_bias)).to(dtype=model_dtype).eval()
+            nn.Sequential(nn.Linear(k, n, bias=has_bias)).to(dtype=weight_dtype).eval()
         )
+        lut_block_shape, scale_block_shape = group_size_to_block_shapes(lut_group_size=lut_group_size, tensor_shape=(n, k), scale_group_size=scale_group_size if has_scales else None)
 
         # Configure the quantization using the new API
         config = GroupwiseLutWeightConfig(
+            code_dtype=code_dtype,
             weight_dtype=weight_dtype,
-            lut_granularity=PerGroup(lut_group_size),
-            scale_granularity=PerGroup(scale_group_size) if has_scales else None,
-            use_qdq_reference=False,  # Ensure we are testing the custom tensor
+            lut_block_shape=lut_block_shape,
+            scale_block_shape=scale_block_shape,
+            use_qdq_reference=False,
+            has_scale=has_scales,
         )
         quantize_(model, config)