diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b17a4111d..221c75444 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.11.2 + rev: v0.14.3 hooks: - id: ruff args: diff --git a/README.md b/README.md index ba1bb56d7..4413ad185 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ The library includes quantization primitives for 8-bit & 4-bit operations, throu ## System Requirements bitsandbytes has the following minimum requirements for all platforms: -* Python 3.9+ +* Python 3.10+ * [PyTorch](https://pytorch.org/get-started/locally/) 2.3+ * _Note: While we aim to provide wide backwards compatibility, we recommend using the latest version of PyTorch for the best experience._ diff --git a/benchmarking/matmul_benchmark.py b/benchmarking/matmul_benchmark.py index 6812a4fdd..d2d099141 100644 --- a/benchmarking/matmul_benchmark.py +++ b/benchmarking/matmul_benchmark.py @@ -35,8 +35,8 @@ def test_bench_matmul(batch, seq, model, hidden): B = torch.empty(hidden, model, dtype=torch.float16, device="cuda") torch.nn.init.xavier_uniform_(B) - B_fp4, state = F.quantize_fp4(B) - B_fp4_c, state_c = F.quantize_fp4(B, compress_statistics=True) + _B_fp4, _state = F.quantize_fp4(B) + _B_fp4_c, _state_c = F.quantize_fp4(B, compress_statistics=True) B_nf4, state_nf4 = F.quantize_nf4(B) B_nf4_c, state_nf4_c = F.quantize_nf4(B, compress_statistics=True) @@ -117,8 +117,8 @@ def test_bench_matmul(batch, seq, model, hidden): f"B -> CB + threshold: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s" ) - CA, SCA, _ = F.int8_vectorwise_quant(A, threshold=0.0) - CB, SCB, _ = F.int8_vectorwise_quant(B) + CA, _SCA, _ = F.int8_vectorwise_quant(A, threshold=0.0) + CB, _SCB, _ = F.int8_vectorwise_quant(B) torch.cuda.synchronize() t0 = time.time() for i in range(iters): diff --git a/bitsandbytes/__init__.py b/bitsandbytes/__init__.py index 59b081a91..0b30a04b7 100644 --- a/bitsandbytes/__init__.py +++ b/bitsandbytes/__init__.py @@ -54,10 +54,7 @@ def _import_backends(): """ from importlib.metadata import entry_points - if sys.version_info < (3, 10): - extensions = entry_points().get("bitsandbytes.backends", []) - else: - extensions = entry_points(group="bitsandbytes.backends") + extensions = entry_points(group="bitsandbytes.backends") for ext in extensions: try: diff --git a/bitsandbytes/autograd/_functions.py b/bitsandbytes/autograd/_functions.py index ece18caa3..5391c8522 100644 --- a/bitsandbytes/autograd/_functions.py +++ b/bitsandbytes/autograd/_functions.py @@ -1,6 +1,7 @@ +from collections.abc import Callable from dataclasses import dataclass from math import prod -from typing import Callable, Optional +from typing import Optional import warnings from warnings import warn @@ -257,7 +258,7 @@ def backward(ctx: torch.autograd.function.FunctionCtx, grad_output: torch.Tensor return torch.zeros_like(ctx.A), torch.zeros_like(ctx.B), None, bias_grad, None req_gradA, req_gradB, _, req_gradBias, _ = ctx.needs_input_grad - CAt, subA, A = ctx.tensors + CAt, subA, _A = ctx.tensors SCAt, idx = ctx.tensor_states state: MatmulLtState = ctx.state grad_A = grad_B = grad_bias = None diff --git a/bitsandbytes/backends/utils.py b/bitsandbytes/backends/utils.py index 34e3d5faa..ec96a440c 100644 --- a/bitsandbytes/backends/utils.py +++ b/bitsandbytes/backends/utils.py @@ -4,9 +4,10 @@ import torch try: - import triton # noqa: F401 import triton.language as tl # noqa: F401 + import triton # noqa: F401 + triton_available = True except ImportError: triton_available = False diff --git a/bitsandbytes/functional.py b/bitsandbytes/functional.py index 7cca33dcf..3d11276ad 100644 --- a/bitsandbytes/functional.py +++ b/bitsandbytes/functional.py @@ -6,7 +6,7 @@ import ctypes as ct import itertools from math import prod -from typing import Any, Optional, Union +from typing import Any, Optional import numpy as np import torch @@ -1413,7 +1413,7 @@ def percentile_clipping(grad: Tensor, gnorm_vec: Tensor, step: int, percentile: raise ValueError(f"Gradient type {grad.dtype} not supported!") current_gnorm = torch.sqrt(gnorm_vec[step % 100]) - vals, idx = torch.sort(gnorm_vec) + vals, _ = torch.sort(gnorm_vec) clip_value = torch.sqrt(vals[percentile]) gnorm_scale = 1.0 @@ -2059,7 +2059,7 @@ def int8_vectorwise_quant(A: torch.Tensor, threshold=0.0): def spmm_coo( - cooA: Union[COOSparseTensor, torch.Tensor], + cooA: COOSparseTensor | torch.Tensor, B: torch.Tensor, out: Optional[torch.Tensor] = None, ): diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py index f482ae6d3..79f8daf2a 100644 --- a/bitsandbytes/nn/modules.py +++ b/bitsandbytes/nn/modules.py @@ -310,28 +310,28 @@ def _quantize(self, device): def cpu(self): return self.to(device="cpu") - def cuda(self, device: Optional[Union[int, device, str]] = None, non_blocking: bool = False): + def cuda(self, device: Optional[int | device | str] = None, non_blocking: bool = False): return self.to(device="cuda" if device is None else device, non_blocking=non_blocking) - def xpu(self, device: Optional[Union[int, device, str]] = None, non_blocking: bool = False): + def xpu(self, device: Optional[int | device | str] = None, non_blocking: bool = False): return self.to(device="xpu" if device is None else device, non_blocking=non_blocking) @overload def to( self: T, - device: Optional[Union[int, device]] = ..., - dtype: Optional[Union[dtype, str]] = ..., + device: Optional[int | device] = ..., + dtype: Optional[dtype | str] = ..., non_blocking: bool = ..., ) -> T: ... @overload - def to(self: T, dtype: Union[dtype, str], non_blocking: bool = ...) -> T: ... + def to(self: T, dtype: dtype | str, non_blocking: bool = ...) -> T: ... @overload def to(self: T, tensor: Tensor, non_blocking: bool = ...) -> T: ... def to(self, *args, **kwargs): - device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs) + device, dtype, non_blocking, _ = torch._C._nn._parse_to(*args, **kwargs) if device is not None and device.type != "meta" and not self.bnb_quantized: return self._quantize(device) @@ -644,10 +644,10 @@ def _quantize(self, device): def cpu(self): return self.to(device="cpu") - def cuda(self, device: Optional[Union[int, device, str]] = None, non_blocking: bool = False): + def cuda(self, device: Optional[int | device | str] = None, non_blocking: bool = False): return self.to(device="cuda" if device is None else device, non_blocking=non_blocking) - def xpu(self, device: Optional[Union[int, device, str]] = None, non_blocking: bool = False): + def xpu(self, device: Optional[int | device | str] = None, non_blocking: bool = False): return self.to(device="xpu" if device is None else device, non_blocking=non_blocking) def __deepcopy__(self, memo): @@ -665,19 +665,19 @@ def __deepcopy__(self, memo): @overload def to( self: T, - device: Optional[Union[int, device]] = ..., - dtype: Optional[Union[dtype, str]] = ..., + device: Optional[int | device] = ..., + dtype: Optional[dtype | str] = ..., non_blocking: bool = ..., ) -> T: ... @overload - def to(self: T, dtype: Union[dtype, str], non_blocking: bool = ...) -> T: ... + def to(self: T, dtype: dtype | str, non_blocking: bool = ...) -> T: ... @overload def to(self: T, tensor: Tensor, non_blocking: bool = ...) -> T: ... def to(self, *args, **kwargs): - device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs) + device, dtype, non_blocking, _ = torch._C._nn._parse_to(*args, **kwargs) is_quantized = self.data.dtype == torch.int8 @@ -1048,7 +1048,7 @@ def to(self, *args, **kwargs): # Call the parent to() method to handle standard parameter/buffer movement result = super().to(*args, **kwargs) - device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs) + device, _, _, _ = torch._C._nn._parse_to(*args, **kwargs) # Handle state tensors if needed. if device is not None: diff --git a/bitsandbytes/optim/optimizer.py b/bitsandbytes/optim/optimizer.py index ea3ff32c9..5f31a584e 100644 --- a/bitsandbytes/optim/optimizer.py +++ b/bitsandbytes/optim/optimizer.py @@ -507,7 +507,7 @@ def update_step(self, group, p, gindex, pindex): step = state["step"] if config["percentile_clipping"] < 100: - current_gnorm, clip_value, gnorm_scale = F.percentile_clipping( + _current_gnorm, _clip_value, gnorm_scale = F.percentile_clipping( grad, state["gnorm_vec"], step, @@ -725,7 +725,7 @@ def update_step(self, group, p, gindex, pindex): step = state["step"] if config["percentile_clipping"] < 100: - current_gnorm, clip_value, gnorm_scale = F.percentile_clipping( + _current_gnorm, _clip_value, gnorm_scale = F.percentile_clipping( grad, state["gnorm_vec"], step, diff --git a/bitsandbytes/research/autograd/_functions.py b/bitsandbytes/research/autograd/_functions.py index 9c7afc354..1ea147a90 100644 --- a/bitsandbytes/research/autograd/_functions.py +++ b/bitsandbytes/research/autograd/_functions.py @@ -307,8 +307,8 @@ def backward(ctx, grad_output): return torch.zeros_like(ctx.A), torch.zeros_like(ctx.B), None, bias_grad, None req_gradA, req_gradB, _, req_gradBias, _ = ctx.needs_input_grad - CAt, subA, A = ctx.tensors - SCAt, idx = ctx.tensor_states + _CAt, _subA, A = ctx.tensors + _SCAt, _idx = ctx.tensor_states state = ctx.state grad_A = grad_B = grad_bias = None @@ -320,7 +320,7 @@ def backward(ctx, grad_output): if len(grad_output.shape) == 3: grad_output = grad_output.reshape(-1, grad_output.shape[-1]).contiguous() - Cgrad, Cgradt, SCgrad, SCgradt, outlier_cols = F.int8_double_quant(grad_output.to(torch.float16)) + _Cgrad, _Cgradt, _SCgrad, _SCgradt, _outlier_cols = F.int8_double_quant(grad_output.to(torch.float16)) if req_gradB: # print('back A shape', A.shape) diff --git a/bitsandbytes/utils.py b/bitsandbytes/utils.py index 1af07710c..98ccd7da6 100644 --- a/bitsandbytes/utils.py +++ b/bitsandbytes/utils.py @@ -91,7 +91,7 @@ def find_outlier_dims(weight, reduction_dim=0, zscore=4.0, topk=None, rdm=False) zstd = (std - stdm) / stdstd if topk is not None: - val, idx = torch.topk(std.abs(), k=topk, dim=0) + _, idx = torch.topk(std.abs(), k=topk, dim=0) else: idx = torch.where(zstd > zscore)[0] diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index 9becd8546..5ca3145d5 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -25,7 +25,7 @@ additional platforms such as AMD ROCm. These are the minimum requirements for `bitsandbytes` across all platforms. Please be aware that some compute platforms may impose more strict requirements. -* Python >= 3.9 +* Python >= 3.10 * PyTorch >= 2.3 ## NVIDIA CUDA[[cuda]] diff --git a/pyproject.toml b/pyproject.toml index 2e7e1b3d2..748b77d90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ maintainers = [ {name="Titus von Köller", email="titus@huggingface.co"}, {name="Matthew Douglas", email="matthew.douglas@huggingface.co"} ] -requires-python = ">=3.9" +requires-python = ">=3.10" readme = "README.md" license = "MIT" license-files = ["LICENSE"] @@ -35,11 +35,11 @@ classifiers = [ "Operating System :: Microsoft :: Windows", "Programming Language :: C++", "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Topic :: Scientific/Engineering :: Artificial Intelligence" ] dependencies = [ @@ -60,7 +60,7 @@ docs = ["hf-doc-builder==0.5.0"] dev = [ "bitsandbytes[test]", "build>=1.0.0,<2", - "ruff==0.11.2", + "ruff~=0.14.3", "pre-commit>=3.5.0,<4", "wheel>=0.42,<1" ] @@ -108,7 +108,7 @@ src = [ "tests", "benchmarking" ] -target-version = "py39" +target-version = "py310" line-length = 119 [tool.ruff.lint] @@ -125,13 +125,14 @@ select = [ ignore = [ "B007", # Loop control variable not used within the loop body (TODO: enable) "B028", # Warning without stacklevel (TODO: enable) + "B905", # zip without explicit `strict=` kwarg "E501", # Suppress line-too-long warnings: trust yapf's judgement on this one. "E701", # Multiple statements on one line (TODO: enable) "E712", # Allow using if x == False, as it's not always equivalent to if x. "E731", # Do not use lambda - "RUF012", # Mutable class attribute annotations - "RUF034", # Useless if-else (TODO: enable) - "ISC001", # single-line-implicit-string-concatenation incompatible with formatter + "RUF012",# Mutable class attribute annotations + "RUF034",# Useless if-else (TODO: enable) + "UP045", # Use `X | None` instead of `Optional[X]` ] [tool.ruff.lint.extend-per-file-ignores] @@ -145,6 +146,9 @@ ignore = [ "F841", "UP030", ] +"bitsandbytes/**/triton/**/*.py" = [ + "I001", # import order +] [tool.ruff.lint.isort] combine-as-imports = true diff --git a/tests/test_deprecated.py b/tests/test_deprecated.py index f469ff351..feb17c966 100644 --- a/tests/test_deprecated.py +++ b/tests/test_deprecated.py @@ -52,7 +52,7 @@ def test_percentile_clipping(gtype): else: gnorm_vec1[step % 100] = gnorm2 - vals, idx = torch.sort(gnorm_vec1) + vals, _ = torch.sort(gnorm_vec1) clip1 = vals[percentile] torch.testing.assert_close(gnorm_vec1, torch.sqrt(gnorm_vec2)) diff --git a/tests/test_functional.py b/tests/test_functional.py index 08de12008..e045be28c 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -312,7 +312,7 @@ def test_fp8_quant(self, device): def test_bench_dequantization(self): a = torch.rand(1024, 1024, device="cuda").half() code = F.create_fp8_map(True, 3, 0, 4).cuda() - qa, SA = F.quantize_blockwise(a, code=code) + qa, _SA = F.quantize_blockwise(a, code=code) print(qa.max()) max_theoretical_mu = 1024 * 1024 * 2 / 1024**3 / 672 * 1000 * 1000 @@ -321,7 +321,7 @@ def test_bench_dequantization(self): torch.cuda.synchronize() t0 = time.time() for i in range(100): - qa, SA = F.quantize_blockwise(a) + qa, _SA = F.quantize_blockwise(a) torch.cuda.synchronize() # print((time.time()-t0)/1e6) @@ -1004,7 +1004,7 @@ def test_spmm_coo_dequant(self, dim1, dim2, dtype): torch.nn.init.xavier_uniform_(B) Bt = B.t().contiguous() - CB, CBt, statsB, statsBt, coo_tensor = F.int8_double_quant(B) + _CB, CBt, _statsB, statsBt, _coo_tensor = F.int8_double_quant(B) rowidx = torch.randint(0, A.shape[-1], size=(15,)) @@ -1023,7 +1023,7 @@ def test_spmm_coo_dequant(self, dim1, dim2, dtype): values, counts = torch.unique(cooA.rowidx, return_counts=True) offset = counts.cumsum(0).int() - max_count, max_idx = torch.sort(counts, descending=True) + max_count, _ = torch.sort(counts, descending=True) print(torch.median(max_count.float())) torch.testing.assert_close(out2, out3, rtol=0.05, atol=0.001) diff --git a/tests/test_optim.py b/tests/test_optim.py index 3d4157152..190d9a206 100644 --- a/tests/test_optim.py +++ b/tests/test_optim.py @@ -496,7 +496,7 @@ def test_adam_percentile_clipping(requires_cuda, dim1, dim2, gtype, optim_bits): g2 = g1.clone() p2.grad = g2 - current_gnorm, clip_val, gnorm_scale = F.percentile_clipping(g1, gnorm_vec, step, 5) + _current_gnorm, _clip_val, gnorm_scale = F.percentile_clipping(g1, gnorm_vec, step, 5) g1 = (g1.float() * gnorm_scale).to(gtype) p1.grad = g1 diff --git a/tests/test_parametrize.py b/tests/test_parametrize.py index 9e661ee2f..d96df2a8c 100644 --- a/tests/test_parametrize.py +++ b/tests/test_parametrize.py @@ -246,14 +246,14 @@ def test_error_conditions(): replace_parameter_4bit(module, "nonexistent") # Test TypeError for non-Parameter attribute - with pytest.raises(TypeError, match="Parameter 'not_param' is not an instance of nn.Parameter"): + with pytest.raises(TypeError, match="Parameter 'not_param' is not an instance of nn\\.Parameter"): replace_parameter_4bit(module, "not_param") # Test same errors for prequantized version with pytest.raises(AttributeError, match="Module does not have parameter 'nonexistent'"): replace_parameter_4bit_prequantized(module, "nonexistent", {}, torch.device("cpu")) - with pytest.raises(TypeError, match="Parameter 'not_param' is not an instance of nn.Parameter"): + with pytest.raises(TypeError, match="Parameter 'not_param' is not an instance of nn\\.Parameter"): replace_parameter_4bit_prequantized(module, "not_param", {}, torch.device("cpu"))