neuralmagic
diff --git a/‎.github/workflows/quality-check.yaml
Lines changed: 29 additions & 0 deletions b/‎.github/workflows/quality-check.yaml
Lines changed: 29 additions & 0 deletions
diff --git a/‎.github/workflows/test-check.yaml
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/test-check.yaml
Lines changed: 3 additions & 0 deletions
diff --git a/‎setup.cfg
Lines changed: 1 addition & 0 deletions b/‎setup.cfg
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/compressed_tensors/compressors/model_compressors/model_compressor.py
Lines changed: 15 additions & 15 deletions b/‎src/compressed_tensors/compressors/model_compressors/model_compressor.py
Lines changed: 15 additions & 15 deletions
diff --git a/‎src/compressed_tensors/compressors/quantized_compressors/base.py
Lines changed: 2 additions & 2 deletions b/‎src/compressed_tensors/compressors/quantized_compressors/base.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py
Lines changed: 4 additions & 5 deletions b/‎src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py
Lines changed: 4 additions & 5 deletions
diff --git a/‎src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py
Lines changed: 4 additions & 3 deletions b/‎src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py
Lines changed: 1 addition & 1 deletion b/‎src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py
Lines changed: 1 addition & 1 deletion b/‎src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/compressed_tensors/quantization/lifecycle/apply.py
Lines changed: 6 additions & 5 deletions b/‎src/compressed_tensors/quantization/lifecycle/apply.py
Lines changed: 6 additions & 5 deletions
@@ -0,0 +1,29 @@
+name: Quality Checks
+on:
+  push:
+    branches:
+      - main
+      - 'release/*'
+  pull_request:
+    branches:
+      - main
+      - 'release/*'
+
+jobs:
+  quality-check:
+    runs-on: ubuntu-24.04
+    steps:
+        - uses: actions/setup-python@v5
+          with:
+            python-version: '3.10'
+        - uses: actions/checkout@v4
+          with:
+            fetch-depth: 0
+            fetch-tags: true
+        - name: Set Env
+          run: |
+            pip3 install --upgrade pip && pip3 install --upgrade setuptools
+        - name: "⚙️ Install dependencies"
+          run: pip3 install .[dev]
+        - name: "🧹 Running quality checks"
+          run: make quality
@@ -4,9 +4,11 @@ on:
   push:
     branches:
       - main
+      - 'release/*'
   pull_request:
     branches:
       - main
+      - 'release/*'
 
 jobs:
   python-tests:
@@ -26,3 +28,4 @@ jobs:
           run: pip3 install .[dev,accelerate]
         - name: "🔬 Running tests"
           run: make test
+
@@ -5,6 +5,7 @@ ensure_newline_before_comments = True
 force_grid_wrap = 0
 include_trailing_comma = True
 sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER
+skip = src/compressed_tensors/version.py
 
 line_length = 88
 lines_after_imports = 2
 
@@ -42,7 +42,6 @@
     apply_quantization_config,
     load_pretrained_quantization_parameters,
 )
-from compressed_tensors.quantization.utils import is_module_quantized
 from compressed_tensors.transform import TransformConfig
 from compressed_tensors.utils import (
     align_module_device,
@@ -309,7 +308,7 @@ def __init__(
         if quantization_config is not None:
             # If a list of compression_format is not provided, we resolve the
             # relevant quantization formats using the config groups from the config
-            # and if those are not defined, we fall-back to the global quantization format
+            # and if those are not defined, we fall-back to the global quantization fmt
             if not self.compression_formats:
                 self.compression_formats = self._fetch_unique_quantization_formats()
 
@@ -661,11 +660,12 @@ def decompress(self, model_path: str, model: Module):
         :param model_path: path to compressed weights
         :param model: pytorch model to load decompressed weights into
 
-        Note: decompress makes use of both _replace_sparsity_weights and _replace_weights
-        The variations in these methods are a result of the subtle variations between the sparsity
-        and quantization compressors. Specifically, quantization compressors return not just the
-        decompressed weight, but the quantization parameters (e.g scales, zero_point) whereas sparsity
-        compressors only return the decompressed weight.
+        Note: decompress makes use of both _replace_sparsity_weights and
+        _replace_weights. The variations in these methods are a result of the subtle
+        variations between the sparsity and quantization compressors. Specifically,
+        quantization compressors return not just the decompressed weight, but the
+        quantization parameters (e.g scales, zero_point) whereas sparsity compressors
+        only return the decompressed weight.
 
         """
         model_path = get_safetensors_folder(model_path)
@@ -707,13 +707,13 @@ def decompress(self, model_path: str, model: Module):
                     model, self.quantization_config
                 )
                 # Load activation scales/zp or any other quantization parameters
-                # Conditionally load the weight quantization parameters if we have a dense compressor
-                # Or if a sparsity compressor has already been applied
+                # Conditionally load the weight quantization parameters if we have a
+                # dense compressor or if a sparsity compressor has already been applied
                 load_pretrained_quantization_parameters(
                     model,
                     model_path,
-                    # TODO: all weight quantization params will be moved to the compressor in a follow-up
-                    # including initialization
+                    # TODO: all weight quantization params will be moved to the
+                    # compressor in a follow-up including initialization
                     load_weight_quantization=(
                         sparse_decompressed
                         or isinstance(quant_compressor, DenseCompressor)
@@ -805,7 +805,6 @@ def _replace_sparsity_weights(self, dense_weight_generator, model: Module):
         :param model: The model whose weights are to be updated.
         """
         for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
-
             split_name = name.split(".")
             prefix, param_name = ".".join(split_name[:-1]), split_name[-1]
             module = operator.attrgetter(prefix)(model)
@@ -841,9 +840,10 @@ def _replace_weights(self, dense_weight_generator, model: Module):
             for param_name, param_data in data.items():
                 if hasattr(module, param_name):
                     # If compressed, will have an incorrect dtype for transformers >4.49
-                    # TODO: we can also just skip initialization of scales/zp if in decompression in init
-                    # to be consistent with loading which happens later as well
-                    # however, update_data does a good shape check - should be moved to the compressor
+                    # TODO: we can also just skip initialization of scales/zp if in
+                    # decompression in init to be consistent with loading which happens
+                    # later as well however, update_data does a good shape check -
+                    # should be moved to the compressor
                     if param_name == "weight":
                         delattr(module, param_name)
                         requires_grad = param_data.dtype in (
 
@@ -24,7 +24,6 @@
     get_nested_weight_mappings,
     merge_names,
 )
-from compressed_tensors.utils.safetensors_load import match_param_name
 from safetensors import safe_open
 from torch import Tensor
 from tqdm import tqdm
@@ -107,7 +106,8 @@ def compress(
                     compressed_dict[name] = value.to(compression_device)
                     continue
 
-                # compress values on meta if loading from meta otherwise on cpu (memory movement too expensive)
+                # compress values on meta if loading from meta otherwise on cpu (memory
+                # movement too expensive)
                 module_path = prefix[:-1] if prefix.endswith(".") else prefix
                 quant_args = names_to_scheme[module_path].weights
                 compressed_values = self.compress_weight(
 
@@ -15,7 +15,6 @@
 
 from typing import Dict, Optional, Tuple
 
-import numpy
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.compressors.quantized_compressors.base import (
@@ -92,7 +91,6 @@ def compress_weight(
         zero_point: Optional[torch.Tensor] = None,
         g_idx: Optional[torch.Tensor] = None,
     ) -> Dict[str, torch.Tensor]:
-
         quantized_weight = quantize(
             x=weight,
             scale=scale,
@@ -112,7 +110,6 @@ def decompress_weight(
         compressed_data: Dict[str, Tensor],
         quantization_args: Optional[QuantizationArgs] = None,
     ) -> torch.Tensor:
-
         weight = compressed_data["weight_packed"]
         scale = compressed_data["weight_scale"]
         global_scale = compressed_data["weight_global_scale"]
@@ -175,14 +172,16 @@ def pack_fp4_to_uint8(x: torch.Tensor) -> torch.Tensor:
     [0.0, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0], dtype=torch.float32
 )
 
+
 # reference: : https://github.com/vllm-project/vllm/pull/16362
 def unpack_fp4_from_uint8(
     a: torch.Tensor, m: int, n: int, dtype: Optional[torch.dtype] = torch.bfloat16
 ) -> torch.Tensor:
     """
     Unpacks uint8 values into fp4. Each uint8 consists of two fp4 values
-    (i.e. first four bits correspond to one fp4 value, last four corresond to a consecutive
-    fp4 value). The bits represent an index, which are mapped to an fp4 value.
+    (i.e. first four bits correspond to one fp4 value, last four correspond to a
+    consecutive fp4 value). The bits represent an index, which are mapped to an fp4
+    value.
 
     :param a: tensor to unpack
     :param m: original dim 0 size of the unpacked tensor
 
@@ -14,7 +14,6 @@
 import math
 from typing import Dict, Literal, Optional, Tuple, Union
 
-import numpy as np
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.compressors.quantized_compressors.base import (
@@ -135,7 +134,8 @@ def compress_weight(
         compressed_dict["weight_shape"] = weight_shape
         compressed_dict["weight_packed"] = packed_weight
 
-        # We typically don't compress zp; apart from when using the packed_compressor and when storing group/channel zp
+        # We typically don't compress zp; apart from when using the packed_compressor
+        # and when storing group/channel zp
         if not quantization_args.symmetric and quantization_args.strategy in [
             QuantizationStrategy.GROUP.value,
             QuantizationStrategy.CHANNEL.value,
@@ -166,7 +166,8 @@ def decompress_weight(
         num_bits = quantization_args.num_bits
         unpacked = unpack_from_int32(weight, num_bits, original_shape)
 
-        # NOTE: this will fail decompression as we don't currently handle packed zp on decompression
+        # NOTE: this will fail decompression as we don't currently handle packed zp on
+        # decompression
         if not quantization_args.symmetric and quantization_args.strategy in [
             QuantizationStrategy.GROUP.value,
             QuantizationStrategy.CHANNEL.value,
 
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from dataclasses import dataclass
-from typing import Dict, Generator, List, Tuple, Union
+from typing import Dict, List, Tuple, Union
 
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
 
@@ -48,7 +48,7 @@ class Marlin24Compressor(BaseCompressor):
 
     @staticmethod
     def validate_quant_compatability(
-        names_to_scheme: Dict[str, QuantizationScheme]
+        names_to_scheme: Dict[str, QuantizationScheme],
     ) -> bool:
         """
         Checks if every quantized module in the model is compatible with Marlin24
 
@@ -71,14 +71,14 @@ def load_pretrained_quantization_parameters(
     Loads the quantization parameters (scale and zero point) from model_name_or_path to
     a model that has already been initialized with a quantization config.
 
-    NOTE: Will always load inputs/output parameters.
-    Will conditioanlly load weight parameters, if load_weight_quantization is set to True.
+    NOTE: Will always load inputs/output parameters. Will conditioanlly load weight
+    parameters, if load_weight_quantization is set to True.
 
     :param model: model to load pretrained quantization parameters to
     :param model_name_or_path: Hugging Face stub or local folder containing a quantized
         model, which is used to load quantization parameters
-    :param load_weight_quantization: whether or not the weight quantization parameters shoud
-        be laoded
+    :param load_weight_quantization: whether or not the weight quantization parameters
+        should be loaded
     """
     model_path = get_safetensors_folder(model_name_or_path)
     mapping = get_quantization_parameter_to_path_mapping(model_path)
@@ -261,7 +261,8 @@ def find_name_or_class_matches(
     """
     if check_contains:
         raise NotImplementedError(
-            "This function is deprecated, and the check_contains=True option has been removed."
+            "This function is deprecated, and the check_contains=True option has been"
+            " removed."
         )
 
     return match_targets(name, module, targets)