fix comments

jiqing-feng · jiqing-feng · commit de5fb9c9c97a · 2025-11-20T09:13:21.000Z
Signed-off-by: jiqing-feng &lt;jiqing.feng@intel.com&gt;
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -280,24 +280,15 @@ if (BUILD_CPU)
         include(CheckCXXCompilerFlag)
         check_cxx_compiler_flag(-mavx512f HAS_AVX512F_FLAG)
         check_cxx_compiler_flag(-mavx512bf16 HAS_AVX512BF16_FLAG)
-        check_cxx_compiler_flag(-mavx512dq HAS_AVX512DQ)
-        check_cxx_compiler_flag(-mavx512bw HAS_AVX512BW)
-        check_cxx_compiler_flag(-mavx512vl HAS_AVX512VL)
         if (HAS_AVX512F_FLAG)
             target_compile_options(bitsandbytes PRIVATE -mavx512f)
-        endif()
-        if (HAS_AVX512BF16_FLAG)
-            target_compile_options(bitsandbytes PRIVATE -mavx512bf16)
-        endif()
-        if(HAS_AVX512DQ)
             target_compile_options(bitsandbytes PRIVATE -mavx512dq)
-        endif()
-        if(HAS_AVX512BW)
             target_compile_options(bitsandbytes PRIVATE -mavx512bw)
-        endif()
-        if(HAS_AVX512VL)
             target_compile_options(bitsandbytes PRIVATE -mavx512vl)
         endif()
+        if (HAS_AVX512BF16_FLAG)
+            target_compile_options(bitsandbytes PRIVATE -mavx512bf16)
+        endif()
         target_compile_options(
             bitsandbytes PRIVATE
             -mprefer-vector-width=256
diff --git a/bitsandbytes/autograd/_functions.py b/bitsandbytes/autograd/_functions.py
@@ -378,7 +378,7 @@ def matmul_4bit(
     if A.device.type == "cpu":
         quant_state.dtype = A.dtype
 
-    if getattr(quant_state, "enable_optimized_cpu", False):
+    if getattr(quant_state, "packing_format_for_cpu", False):
         out = F.gemv_4bit(A, B, out, state=quant_state)
         if bias is not None:
             out += bias
diff --git a/bitsandbytes/functional.py b/bitsandbytes/functional.py
@@ -2103,7 +2103,7 @@ def spmm_coo_very_sparse(cooA, B, dequant_stats=None, out=None):
     return out
 
 
-def convert_weight_packed_for_cpu(qweight: torch.Tensor, quant_state: QuantState, block_n: int = 32):
+def _convert_weight_packed_for_cpu(qweight: torch.Tensor, quant_state: QuantState, block_n: int = 32):
     """
     qweight: (K * N / 2)  uint8
     return: packed_weight
diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py
@@ -12,7 +12,7 @@
 
 import bitsandbytes as bnb
 from bitsandbytes.cextension import ROCM_WARP_SIZE_64
-from bitsandbytes.functional import QuantState, convert_weight_packed_for_cpu, has_avx512bf16
+from bitsandbytes.functional import QuantState, _convert_weight_packed_for_cpu, has_avx512bf16
 from bitsandbytes.optim import GlobalOptimManager
 from bitsandbytes.utils import INVERSE_LINEAR_8BIT_WEIGHTS_FORMAT_MAPPING, OutlierTracer
 
@@ -479,7 +479,7 @@ def __init__(
         self.compute_type_is_set = compute_dtype is not None
         self.quant_state = None
         self.quant_storage = quant_storage
-        self.enable_optimized_cpu = False
+        self.packing_format_for_cpu = False
 
     def set_compute_type(self, x):
         if x.dtype in [torch.float32, torch.bfloat16]:
@@ -513,19 +513,19 @@ def _save_to_state_dict(self, destination, prefix, keep_vars):
                 destination[prefix + "weight." + k] = v if keep_vars else v.detach()
 
     def forward(self, x: torch.Tensor):
-        quant_state = self.weight.quant_state
         fix_4bit_weight_quant_state_from_module(self)
+        quant_state = self.weight.quant_state
 
         if (
-            not self.enable_optimized_cpu
+            not self.packing_format_for_cpu
             and x.device.type == "cpu"
             and has_avx512bf16()
             and not self.training
             and x.requires_grad == False
         ):
-            self.weight.data, quant_state = convert_weight_packed_for_cpu(self.weight.data, quant_state)
-            self.enable_optimized_cpu = True
-            quant_state.enable_optimized_cpu = True
+            self.weight.data, quant_state = _convert_weight_packed_for_cpu(self.weight.data, quant_state)
+            self.packing_format_for_cpu = True
+            quant_state.packing_format_for_cpu = True
 
         # weights are cast automatically as Int8Params, but the bias has to be cast manually
         if self.bias is not None and self.bias.dtype != x.dtype:
@@ -540,7 +540,7 @@ def forward(self, x: torch.Tensor):
             x = x.to(self.compute_dtype)
 
         bias = None if self.bias is None else self.bias.to(self.compute_dtype)
-        weight = self.weight if getattr(quant_state, "enable_optimized_cpu", False) else self.weight.t()
+        weight = self.weight if getattr(quant_state, "packing_format_for_cpu", False) else self.weight.t()
 
         return bnb.matmul_4bit(x, weight, bias=bias, quant_state=quant_state).to(inp_dtype)