vllm-project · NJX-njx · Mar 4, 2026 · Copilot · Mar 4, 2026 · Copilot
@@ -8,7 +8,7 @@
 """
 
 from dataclasses import dataclass, field
-from typing import Callable
+from collections.abc import Callable
 
 from datasets import Dataset, DatasetDict
 from torch.utils.data import DataLoader

diff --git a/src/llmcompressor/core/events/event.py b/src/llmcompressor/core/events/event.py
@@ -84,9 +84,9 @@ class Event:
     :type global_batch: int
     """
 
-    type_: Optional[EventType] = None
-    steps_per_epoch: Optional[int] = None
-    batches_per_step: Optional[int] = None
+    type_: EventType | None = None
+    steps_per_epoch: int | None = None
+    batches_per_step: int | None = None
     invocations_per_step: int = 1
     global_step: int = 0
     global_batch: int = 0
@@ -206,7 +206,7 @@ def current_index(self, value: float):
             )
 
     def should_update(
-        self, start: Optional[float], end: Optional[float], update: Optional[float]
+        self, start: float | None, end: float | None, update: float | None
     ) -> bool:
         """
         Determines if the event should trigger an update.

diff --git a/src/llmcompressor/core/session.py b/src/llmcompressor/core/session.py
@@ -7,7 +7,9 @@
 """
 
 from dataclasses import dataclass
-from typing import Any, Callable
+from typing import Any
+
+from collections.abc import Callable
 
-from typing import Any
-
-from collections.abc import Callable
+from collections.abc import Callable
+from typing import Any
-from typing import Any
-
-from collections.abc import Callable
+from collections.abc import Callable
+from typing import Any
 from loguru import logger
 

diff --git a/src/llmcompressor/core/session_functions.py b/src/llmcompressor/core/session_functions.py
@@ -7,7 +7,9 @@
 
 import threading
 from contextlib import contextmanager
-from typing import TYPE_CHECKING, Any, Generator, Optional
+from typing import TYPE_CHECKING, Any, Optional
+
+from collections.abc import Generator
 
 from loguru import logger
 
@@ -91,7 +93,7 @@ def event(cls, event_type: EventType, **kwargs) -> ModifiedState:
         return active_session().event(event_type, **kwargs)
 
     @classmethod
-    def batch_start(cls, batch_data: Optional[Any] = None, **kwargs) -> ModifiedState:
+    def batch_start(cls, batch_data: Any | None = None, **kwargs) -> ModifiedState:
         """
         Invoke a batch start event for the active session
 
@@ -102,7 +104,7 @@ def batch_start(cls, batch_data: Optional[Any] = None, **kwargs) -> ModifiedStat
         return cls.event(EventType.BATCH_START, batch_data=batch_data, **kwargs)
 
     @classmethod
-    def loss_calculated(cls, loss: Optional[Any] = None, **kwargs) -> ModifiedState:
+    def loss_calculated(cls, loss: Any | None = None, **kwargs) -> ModifiedState:
         """
         Invoke a loss calculated event for the active session
 

@@ -10,7 +10,9 @@
 import math
 import re
 from collections.abc import Iterator, Sized
-from typing import Any, Callable, Optional
+from typing import Any, Optional
+
+from collections.abc import Callable
 
 import torch
 from datasets import Dataset
@@ -334,7 +336,7 @@ class LengthAwareSampler(Sampler[int]):
     def __init__(
         self,
         data_source: Dataset,
-        num_samples: Optional[int] = None,
+        num_samples: int | None = None,
         batch_size: int = 1,
     ) -> None:
         self.data_source = data_source

@@ -2,7 +2,9 @@
 import shutil
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
-from typing import Iterable, Optional
+from typing import Optional
-from typing import Optional
-from typing import Optional
+
+from collections.abc import Iterable
 
 import torch
 import tqdm
@@ -40,7 +42,7 @@ def model_free_ptq(
     scheme: QuantizationScheme | str,
     ignore: Iterable[str] = tuple(),
     max_workers: int = 1,
-    device: Optional[torch.device | str] = None,
+    device: torch.device | str | None = None,
 ):
     """
     Quantize a model without the need for a model definition. This function operates on

@@ -1,7 +1,9 @@
 import os
 import re
 from collections import defaultdict
-from typing import Mapping, TypeVar
+from typing import TypeVar
+
+from collections.abc import Mapping
 
-from typing import TypeVar
-
-from collections.abc import Mapping
+from collections.abc import Mapping
+from typing import TypeVar
-from typing import TypeVar
-
-from collections.abc import Mapping
+from collections.abc import Mapping
+from typing import TypeVar
 import torch
 from compressed_tensors.utils.match import _match_name
@@ -95,11 +97,11 @@ def natural_key(s: str) -> list[str | int]:
                     )
 
         # once we have a full set, yield and reset
-        if all((matches[target] is not None for target in targets)):
+        if all(matches[target] is not None for target in targets):
             matched_sets.append(matches)
             matches = dict.fromkeys(targets, None)
 
-    unmatched_set = matches if any((v is not None for v in matches.values())) else None
+    unmatched_set = matches if any(v is not None for v in matches.values()) else None
 
     if return_unmatched:
         return matched_sets, unmatched_set

@@ -1,7 +1,7 @@
 import os
 from collections import defaultdict
 from collections.abc import Iterator, Mapping
-from typing import Iterable
+from collections.abc import Iterable
 
 import torch
 from compressed_tensors.quantization import QuantizationScheme

@@ -77,7 +77,7 @@ def reindex_fused_weights(
             shutil.copyfile(resolved_path, save_path)
 
     # read index file
-    with open(index_file, "r") as file:
+    with open(index_file) as file:
         index_file_data = json.load(file)
 
     weight_map: dict[str, str] = index_file_data["weight_map"]

@@ -50,7 +50,7 @@ def update_config(
     # write results to config.json file
     config_file_path = find_config_path(save_directory)
     if config_file_path is not None:
-        with open(config_file_path, "r") as file:
+        with open(config_file_path) as file:
             config_data = json.load(file)
 
         config_data[QUANTIZATION_CONFIG_NAME] = qconfig_data

@@ -57,7 +57,7 @@ def validate_safetensors_index(model_files: dict[str, str], scheme: Quantization
         return
 
     if is_microscale_scheme(scheme):
-        with open(index_file_path, "r") as file:
+        with open(index_file_path) as file:
             weight_map: dict[str, str] = json.load(file)["weight_map"]
 
         file_map = invert_mapping(weight_map)

@@ -12,7 +12,9 @@
 import os
 from datetime import datetime
 from pathlib import Path
-from typing import TYPE_CHECKING, Callable
+from typing import TYPE_CHECKING
+
+from collections.abc import Callable
 
-from typing import TYPE_CHECKING
-
-from collections.abc import Callable
+from collections.abc import Callable
+from typing import TYPE_CHECKING
-from typing import TYPE_CHECKING
-
-from collections.abc import Callable
+from collections.abc import Callable
+from typing import TYPE_CHECKING
 from loguru import logger
 from torch.utils.data import DataLoader

diff --git a/src/llmcompressor/logger.py b/src/llmcompressor/logger.py
@@ -54,13 +54,13 @@
 class LoggerConfig:
     disabled: bool = False
     clear_loggers: bool = True
-    console_log_level: Optional[str] = "INFO"
-    log_file: Optional[str] = None
-    log_file_level: Optional[str] = None
+    console_log_level: str | None = "INFO"
+    log_file: str | None = None
+    log_file_level: str | None = None
     metrics_disabled: bool = False
 
 
-def configure_logger(config: Optional[LoggerConfig] = None) -> None:
+def configure_logger(config: LoggerConfig | None = None) -> None:
     """
     Configure the logger for LLM Compressor.
 
@@ -122,7 +122,7 @@ def configure_logger(config: Optional[LoggerConfig] = None) -> None:
     logger.level("METRIC", no=38, color="<yellow>", icon="📈")
 
 
-def support_log_once(record: Dict[str, Any]) -> bool:
+def support_log_once(record: dict[str, Any]) -> bool:
     """
     Support logging only once using `.bind(log_once=True)`
 

@@ -1,4 +1,4 @@
-from typing import Iterable
+from collections.abc import Iterable
 
 import torch
 from compressed_tensors import (

@@ -109,10 +109,10 @@ def copy_from_fused_weights(
     def forward(
         self,
         hidden_states: torch.Tensor,  # [B, T, H]
-        router_indices: Optional[
+        router_indices: None | (
             torch.Tensor
-        ] = None,  # [B, T, top_k] or [tokens, top_k]
-        routing_weights: Optional[torch.Tensor] = None,  # [B, T, E] or [tokens, E]
+        ) = None,  # [B, T, top_k] or [tokens, top_k]
+        routing_weights: torch.Tensor | None = None,  # [B, T, E] or [tokens, E]
     ) -> torch.Tensor:
         """
         Implements the MoE computation using the router outputs.
@@ -192,11 +192,11 @@ def set_module_by_path(root: nn.Module, dotpath: str, new_module: nn.Module) ->
     setattr(parent, parts[-1], new_module)
 
 
-def find_experts(model: nn.Module) -> List[ExpertMeta]:
+def find_experts(model: nn.Module) -> list[ExpertMeta]:
     """
     Locate GPT-OSS MoE expert modules under model.model.layers[*].mlp.experts.
     """
-    metas: List[ExpertMeta] = []
+    metas: list[ExpertMeta] = []
     for li, layer in enumerate(model.model.layers):
         experts = layer.mlp.experts
         device = next(experts.parameters(), torch.zeros(())).device

@@ -46,7 +46,7 @@ def __init__(
         self.shared_expert = original.shared_expert
         self.calibrate_all_experts = calibrate_all_experts
 
-    def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+    def forward(self, hidden_states: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
         hidden_states = hidden_states.reshape(-1, self.hidden_dim)
         router_scores, router_logits = self.router(hidden_states)
         out = self.shared_expert(hidden_states)

@@ -1,4 +1,3 @@
-# coding=utf-8
 # Copyright 2025 The Qwen team, Alibaba Group and the HuggingFace Inc. team.
 # All rights reserved.
 #

diff --git a/src/llmcompressor/modifiers/autoround/base.py b/src/llmcompressor/modifiers/autoround/base.py
@@ -147,17 +147,17 @@ class AutoRoundModifier(Modifier, QuantizationMixin):
         Defaults to None.
     """
 
-    sequential_targets: Union[str, List[str], None] = None
+    sequential_targets: str | list[str] | None = None
     # AutoRound modifier arguments
     iters: int = 200
     enable_torch_compile: bool = True
     batch_size: int = 8
-    lr: Optional[float] = None
-    device_ids: Optional[str] = None
+    lr: float | None = None
+    device_ids: str | None = None
 
     # private variables
-    _all_module_input: Dict[str, List[Tuple]] = PrivateAttr(default_factory=dict)
-    _q_input: Optional[torch.Tensor] = PrivateAttr(default=None)
+    _all_module_input: dict[str, list[tuple]] = PrivateAttr(default_factory=dict)
+    _q_input: torch.Tensor | None = PrivateAttr(default=None)
 
     def on_initialize(self, state: State, **kwargs) -> bool:
         """
@@ -338,7 +338,7 @@ def on_finalize(self, state: State, **kwargs) -> bool:
 
         return True
 
-    def get_unquantized_layer_names(self, wrapped_model: torch.nn.Module) -> List[str]:
+    def get_unquantized_layer_names(self, wrapped_model: torch.nn.Module) -> list[str]:
         unquantized_layers = []
 
         for name, module in wrapped_model.named_modules():

diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py
@@ -1,6 +1,8 @@
 import inspect
 from itertools import product
-from typing import Iterator, Literal
+from typing import Literal
+
+from collections.abc import Iterator
 
-from typing import Literal
-
-from collections.abc import Iterator
+from collections.abc import Iterator
+from typing import Literal
-from typing import Literal
-
-from collections.abc import Iterator
+from collections.abc import Iterator
+from typing import Literal
 import torch
 from compressed_tensors.quantization import (
@@ -335,12 +337,12 @@ def _set_resolved_mappings(self, model: Module) -> None:
         resolved_mappings: list[ResolvedMapping] = []
         module_to_name = get_module_to_name_dict(model)
         # Get names of modules targeted for quantization (excludes ignored)
-        targeted_names = set(
+        targeted_names = {
             name
             for name, _ in match_named_modules(
                 model, self.resolved_targets, self.ignore
             )
-        )
+        }
         for mapping in self.mappings:
             # we deliberately don't use the ignore list when matching mappings,
             # so that we can handle layers that need smoothing but not quantization

diff --git a/src/llmcompressor/modifiers/gptq/base.py b/src/llmcompressor/modifiers/gptq/base.py
@@ -118,17 +118,17 @@ class GPTQModifier(Modifier, QuantizationMixin):
     """
 
     # gptq modifier arguments
-    sequential_targets: Union[str, List[str], None] = None
+    sequential_targets: str | list[str] | None = None
     block_size: int = 128
-    dampening_frac: Optional[float] = 0.01
+    dampening_frac: float | None = 0.01
     # TODO: this does not serialize / will be incorrectly written
-    actorder: Optional[Union[ActivationOrdering, Sentinel]] = Sentinel("static")
+    actorder: ActivationOrdering | Sentinel | None = Sentinel("static")
     offload_hessians: bool = False
 
     # private variables
-    _module_names: Dict[torch.nn.Module, str] = PrivateAttr(default_factory=dict)
-    _hessians: Dict[torch.nn.Module, torch.Tensor] = PrivateAttr(default_factory=dict)
-    _num_samples: Dict[torch.nn.Module, torch.Tensor] = PrivateAttr(
+    _module_names: dict[torch.nn.Module, str] = PrivateAttr(default_factory=dict)
+    _hessians: dict[torch.nn.Module, torch.Tensor] = PrivateAttr(default_factory=dict)
+    _num_samples: dict[torch.nn.Module, torch.Tensor] = PrivateAttr(
         default_factory=dict
     )
 
@@ -235,7 +235,7 @@ def on_event(self, state: State, event: Event, **kwargs):
     def calibrate_module(
         self,
         module: torch.nn.Module,
-        args: Tuple[torch.Tensor, ...],
+        args: tuple[torch.Tensor, ...],
         _output: torch.Tensor,
     ):
         """

diff --git a/src/llmcompressor/modifiers/pruning/constant/base.py b/src/llmcompressor/modifiers/pruning/constant/base.py
@@ -14,8 +14,8 @@
 
 
 class ConstantPruningModifier(Modifier, LayerParamMasking):
-    targets: Union[str, List[str]]
-    parameterized_layers_: Dict[str, ModelParameterizedLayer] = None
+    targets: str | list[str]
+    parameterized_layers_: dict[str, ModelParameterizedLayer] = None
     _epsilon: float = 10e-9
     _save_masks: bool = False
     _use_hooks: bool = False

diff --git a/src/llmcompressor/modifiers/pruning/helpers.py b/src/llmcompressor/modifiers/pruning/helpers.py
@@ -9,7 +9,9 @@
 import math
 import re
 from dataclasses import dataclass
-from typing import Any, Callable, Dict
+from typing import Any, Dict
+
+from collections.abc import Callable
 
-from typing import Any, Dict
-
-from collections.abc import Callable
+from collections.abc import Callable
+from typing import Any, Dict
-from typing import Any, Dict
-
-from collections.abc import Callable
+from collections.abc import Callable
+from typing import Any, Dict
 from llmcompressor.core import Event, State
 
@@ -34,7 +36,7 @@ class PruningCreateSettings:
     update: float
     init_sparsity: float
     final_sparsity: float
-    args: Dict[str, Any]
+    args: dict[str, Any]
 
 
 SchedulerCalculationType = Callable[[Event, State], float]

diff --git a/src/llmcompressor/modifiers/pruning/magnitude/base.py b/src/llmcompressor/modifiers/pruning/magnitude/base.py
@@ -22,16 +22,16 @@
 
 
 class MagnitudePruningModifier(Modifier, LayerParamMasking):
-    targets: Union[str, List[str]]
+    targets: str | list[str]
     init_sparsity: float
     final_sparsity: float
     update_scheduler: str = "cubic"
-    scheduler_args: Dict[str, Any] = {}
+    scheduler_args: dict[str, Any] = {}
     mask_structure: str = "unstructured"
     leave_enabled: bool = False
     apply_globally: bool = False
 
-    parameterized_layers_: Dict[str, ModelParameterizedLayer] = None
+    parameterized_layers_: dict[str, ModelParameterizedLayer] = None
     _save_masks: bool = False
     _use_hooks: bool = False
     scheduler_function_: SchedulerCalculationType = None