[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit ed4de7c41edf · 2025-05-16T06:34:24.000Z
for more information, see https://pre-commit.ci
diff --git a/litgpt/attention.py b/litgpt/attention.py
@@ -322,7 +322,10 @@ def build_mask_cache(
     """
     # Usual causal mask:
     mask = torch.ones(
-        max_seq_length, max_seq_length, device=device, dtype=dtype,
+        max_seq_length,
+        max_seq_length,
+        device=device,
+        dtype=dtype,
     ).triu(diagonal=1)
     if sliding_window_size is not None:
         mask += torch.ones_like(mask).tril(diagonal=-sliding_window_size)
@@ -367,7 +370,10 @@ def build_mask_slice(
     device = token_positions.device
     tp_dtype = token_positions.dtype
     bool_mask = torch.arange(
-        input_pos, input_pos + num, device=device, dtype=tp_dtype,
+        input_pos,
+        input_pos + num,
+        device=device,
+        dtype=tp_dtype,
     ).view(1, 1, -1, 1) < token_positions.unsqueeze(2)
     if sliding_window_size is not None:
         extra_mask = torch.arange(
diff --git a/litgpt/config.py b/litgpt/config.py
@@ -11,6 +11,7 @@
 
 from litgpt.utils import find_multiple
 
+
 # See `Config.start_of_layer_hook`. A start of layer hook is called just before
 # a layer is computed. The call is `hook(x, block_idx, input_pos)`, where
 # `x` is the layer input, `block_idx` the number of the layer, and `input_pos`
diff --git a/tests/test_model.py b/tests/test_model.py
@@ -37,9 +37,9 @@
 import litgpt.config as config_module
 from litgpt import GPT, Config
 from litgpt.attention import (
+    DefaultKeysAndValues,
     build_mask_cache,
     build_mask_slice,
-    DefaultKeysAndValues,
     scaled_dot_product_attention,
 )
 from litgpt.model import CausalSelfAttention
@@ -1540,13 +1540,18 @@ def test_build_mask_slice(
         for bs in range(batch_size):
             for nq in range(n_query_groups):
                 token_positions[bs, nq, :] = torch.randperm(
-                    seq_len, device=device,
+                    seq_len,
+                    device=device,
                 )[:cache_length]
         mask = build_mask_slice(
-            input_pos, num, token_positions, dtype, sliding_window_size,
+            input_pos,
+            num,
+            token_positions,
+            dtype,
+            sliding_window_size,
         )
         mask_cmp = batched_index_select(
-            full_mask[input_pos: (input_pos + num), :],
+            full_mask[input_pos : (input_pos + num), :],
             dim=1,
             idx=token_positions,
         )