fix pre-commit

PhaneeshB · PhaneeshB · commit 701de1f38fd6 · 2025-09-30T07:45:06.000Z
diff --git a/sharktank/sharktank/utils/_helpers.py b/sharktank/sharktank/utils/_helpers.py
@@ -25,6 +25,7 @@ def _as_tuple(x):
         return tuple(x)
     return (x,)
 
+
 def export_torch_module_to_mlir(
     module: torch.nn.Module,
     input_args=(),
@@ -55,25 +56,22 @@ def export_torch_module_to_mlir(
         expected = module(*input_args, **kwargs)
 
     fxb = FxProgramsBuilder(module)
-    
+
     # empty tensors for export input
     # there needs to be one corresponding to each arg
     # NOTE: assuming args are not nested.
-    empty_args = tuple([
-        torch.empty(arg.shape, dtype=arg.dtype) for arg in input_args
-    ])
+    empty_args = tuple([torch.empty(arg.shape, dtype=arg.dtype) for arg in input_args])
 
     # need to get this info from the test, currently only for static shapes
     # one corresponding to each arg
     dynamic_shapes = tuple([dict() for _ in input_args])
 
-
     @fxb.export_program(
         name=target_fn,
         args=empty_args,
         dynamic_shapes=(dynamic_shapes,),
-        strict=False, 
-        )
+        strict=False,
+    )
     def _(module, *fn_args):
         return module.forward(*fn_args)
 
@@ -175,36 +173,38 @@ def compare_iree_torch_outputs(
         actual = (actual,)
 
     # Match dtypes to be safe (IREE may produce f32 by default in some paths)
-    actual = tuple(a.to(e.dtype) if hasattr(a, "dtype") else a for a, e in zip(actual, expected))
+    actual = tuple(
+        a.to(e.dtype) if hasattr(a, "dtype") else a for a, e in zip(actual, expected)
+    )
     torch.testing.assert_close(actual, expected, atol=atol, rtol=rtol)
 
 
 def validate_and_get_irpa_path(request):
     """
     Validate and get IRPA path from pytest request configuration.
-    
+
     Args:
         request: pytest request fixture
-        
+
     Returns:
         str: Path to the IRPA file
-        
+
     Raises:
         pytest.skip: If IRPA path is not provided or file doesn't exist
     """
     from pytest import skip
-    
+
     # Get IRPA path from command line argument
     irpa_path = request.config.getoption("--parameters")
-    
+
     # Skip test if no IRPA path provided
     if irpa_path is None:
         skip("No IRPA path provided. Use --parameters to specify the IRPA file.")
-    
+
     # Skip test if IRPA file doesn't exist
     if not Path(irpa_path).exists():
         skip(f"IRPA file not found: {irpa_path}")
-    
+
     return irpa_path
 
 
@@ -217,7 +217,7 @@ def run_iree_vs_torch_fx(
     rtol=0.0,
     entrypoint="run_forward",
     parameters_path=None,
-    compile_flags: list[str]|None=None,
+    compile_flags: list[str] | None = None,
     driver="hip",
     device_count=1,
 ):
diff --git a/sharktank/sharktank/utils/_iree_compile_flags_config.py b/sharktank/sharktank/utils/_iree_compile_flags_config.py
@@ -10,7 +10,7 @@
 
 LLM_HIP_COMPILE_FLAGS = [
     "--iree-hal-target-device=hip",
-    "--iree-hip-target=gfx942",         # MI300 example; adjust to your GPU if needed
+    "--iree-hip-target=gfx942",  # MI300 example; adjust to your GPU if needed
     "--iree-execution-model=async-external",
     "--iree-opt-strip-assertions=true",
     "--iree-opt-level=O3",
@@ -20,5 +20,5 @@
     "--iree-stream-resource-memory-model=discrete",
     "--iree-hip-specialize-dispatches",
     "--iree-hal-memoization=true",
-    "--iree-codegen-enable-default-tuning-specs=true"
-]
+    "--iree-codegen-enable-default-tuning-specs=true",
+]
diff --git a/sharktank/tests/layers/ffn_with_iree_test.py b/sharktank/tests/layers/ffn_with_iree_test.py
@@ -8,6 +8,7 @@
 from sharktank.utils._helpers import run_iree_vs_torch_fx
 from sharktank.utils._iree_compile_flags_config import LLM_HIP_COMPILE_FLAGS
 
+
 class FFN(torch.nn.Module):
     def __init__(self, hidden=64, inter=128, dtype=torch.float32, activation="silu"):
         super().__init__()
@@ -22,9 +23,12 @@ def forward(self, x):
         else:
             return self.w_down(torch.nn.functional.gelu(self.w_up(x)))
 
+
 @pytest.mark.parametrize("dtype,atol", [(torch.float32, 1e-4), (torch.float16, 1e-4)])
 def test_ffn_iree_vs_eager(dtype, atol):
     torch.manual_seed(42)
     m = FFN(hidden=64, inter=128, dtype=dtype, activation="silu")
     x = torch.randn(2, 8, 64, dtype=dtype)
-    run_iree_vs_torch_fx(m, input_args=(x,), atol=atol, rtol=0, compile_flags=LLM_HIP_COMPILE_FLAGS)
+    run_iree_vs_torch_fx(
+        m, input_args=(x,), atol=atol, rtol=0, compile_flags=LLM_HIP_COMPILE_FLAGS
+    )
diff --git a/sharktank/tests/layers/linear_with_iree_test.py b/sharktank/tests/layers/linear_with_iree_test.py
@@ -18,9 +18,12 @@ def __init__(self, in_f, out_f, bias=False, dtype=torch.float32):
     def forward(self, x):
         return self.lin(x)
 
+
 @pytest.mark.parametrize("dtype,atol", [(torch.float32, 1e-4), (torch.float16, 1e-4)])
 def test_linear_iree_vs_eager(dtype, atol):
     torch.manual_seed(42)
     m = Linear(64, 64, bias=False, dtype=dtype)
     x = torch.randn(2, 8, 64, dtype=dtype)
-    run_iree_vs_torch_fx(m, input_args=(x,), atol=atol, rtol=0, compile_flags=LLM_HIP_COMPILE_FLAGS)
+    run_iree_vs_torch_fx(
+        m, input_args=(x,), atol=atol, rtol=0, compile_flags=LLM_HIP_COMPILE_FLAGS
+    )
diff --git a/sharktank/tests/layers/output_lm_test_with_iree.py b/sharktank/tests/layers/output_lm_test_with_iree.py
@@ -16,100 +16,105 @@
 
 class OutputLMHead(torch.nn.Module):
     """Standalone output_lm_head block extracted from PagedLlmModelV1"""
-    
+
     def __init__(self, theta: Theta, config: LlamaModelConfig):
         super().__init__()
         self.config = config
         self.hp = config.hp
-        
+
         # Output normalization layer
         self.output_norm = RMSNormLayer(
-            theta("output_norm"), 
-            epsilon=self.hp.attention_layer_norm_rms_epsilon
+            theta("output_norm"), epsilon=self.hp.attention_layer_norm_rms_epsilon
         )
-        
+
         # Output linear layer (language model head)
         self.output_lm_head = LinearLayer(
             theta("output"),
             matmul_kernel=config.matmul_kernel,
         )
-    
+
     def forward(self, h: torch.Tensor) -> torch.Tensor:
         # Apply normalization
-        h_norm = self.output_norm(h) # output fp16 && wieghts float32
-        
+        h_norm = self.output_norm(h)  # output fp16 && wieghts float32
+
         # Apply final linear transformation
-        logits = self.output_lm_head(h_norm) # output && weights fp16
-        
+        logits = self.output_lm_head(h_norm)  # output && weights fp16
+
         return logits
 
 
-def create_output_lm_head_from_irpa(irpa_path: str) -> tuple[OutputLMHead, torch.Tensor]:
+def create_output_lm_head_from_irpa(
+    irpa_path: str,
+) -> tuple[OutputLMHead, torch.Tensor]:
     """
     Create OutputLMHead module from IRPA file and generate sample input.
-    
+
     Args:
         irpa_path: Path to the IRPA file
-        
+
     Returns:
         Tuple of (OutputLMHead module, sample input tensor)
     """
     # Load dataset from IRPA file
     dataset = Dataset.load(Path(irpa_path))
-    
+
     # Create model config from dataset
     llama_config = LlamaModelConfig.from_dataset(
         dataset=dataset,
         attention_kernel="torch",
         matmul_kernel="sharktank.asm;*",
         activation_dtype=torch.float16,
     )
-    
+
     # Create the output LM head module
     output_lm_head = OutputLMHead(dataset.root_theta, llama_config)
-    
+
     # Generate sample input tensor matching expected dimensions
     # Typical shape: [batch_size, seq_len, hidden_dim]
     # TODO: Check if there are other more suitable sizes to test.
     batch_size = 2
     seq_len = 8
-    hidden_dim = llama_config.hp.embedding_length  # Use embedding_length instead of model_dim
-    
+    hidden_dim = (
+        llama_config.hp.embedding_length
+    )  # Use embedding_length instead of model_dim
+
     sample_input = torch.randn(
-        batch_size, seq_len, hidden_dim, 
-        dtype=llama_config.activation_dtype
+        batch_size, seq_len, hidden_dim, dtype=llama_config.activation_dtype
     )
-    
+
     return output_lm_head, sample_input
 
 
 # Test cases
-@pytest.mark.parametrize("dtype,atol", [
-    (torch.float16, 1e-4)
-])
+@pytest.mark.parametrize("dtype,atol", [(torch.float16, 1e-4)])
 def test_output_lm_head_iree_vs_eager(request, dtype, atol):
     """
     Test OutputLMHead module comparing IREE vs PyTorch eager execution.
-    
+
     Use --parameters command line argument to specify the IRPA file path.
     """
     # Validate and get IRPA path
     irpa_path = validate_and_get_irpa_path(request)
-    
+
     try:
         # Create module and sample input from IRPA
-        module, sample_input = create_output_lm_head_from_irpa(irpa_path)        
+        module, sample_input = create_output_lm_head_from_irpa(irpa_path)
     except Exception as e:
         pytest.skip(f"Failed to load model from IRPA: {e}")
 
         # Convert to desired dtype
         # module = module.to(dtype)
         sample_input = sample_input.to(dtype)
-        
+
         # Run IREE vs torch comparison
-        run_iree_vs_torch_fx(module, input_args=(sample_input,), atol=atol, rtol=0,
-                             compile_flags=LLM_HIP_COMPILE_FLAGS,
-                             parameters_path=irpa_path)
+        run_iree_vs_torch_fx(
+            module,
+            input_args=(sample_input,),
+            atol=atol,
+            rtol=0,
+            compile_flags=LLM_HIP_COMPILE_FLAGS,
+            parameters_path=irpa_path,
+        )
 
 
 def test_output_lm_head_mock():
@@ -118,10 +123,10 @@ def test_output_lm_head_mock():
     Adding this test to work without requiring an IRPA file.
     """
     torch.manual_seed(42)
-    
+
     # Mock configuration - provide all required parameters
     from sharktank.layers.configs import LlamaHParams
-    
+
     # Create LlamaHParams with all required parameters
     hp = LlamaHParams(
         model_arch="llama",
@@ -135,41 +140,48 @@ def test_output_lm_head_mock():
         attention_head_count_kv=8,
         vocab_size=32000,
     )
-    
+
     # Create mock config
     config = LlamaModelConfig(
         hp=hp,
         activation_dtype=torch.float16,
         # attention_dtype=torch.float32,
     )
-    
+
     # Create mock theta with synthetic weights
     from sharktank.types import DefaultPrimitiveTensor
-    
+
     # Mock output_norm weights
     output_norm_weight = torch.randn(hp.embedding_length, dtype=torch.float32)
-    
-    # Mock output (lm_head) weights  
+
+    # Mock output (lm_head) weights
     output_weight = torch.randn(hp.vocab_size, hp.embedding_length, dtype=torch.float16)
-    
+
     # Create theta structure
     theta_dict = {
         "output_norm": {"weight": DefaultPrimitiveTensor(data=output_norm_weight)},
         "output": {"weight": DefaultPrimitiveTensor(data=output_weight)},
     }
-    
+
     theta = Theta(theta_dict)
-    
+
     # Create module
     module = OutputLMHead(theta, config)
-    
+
     # Create sample input
     batch_size, seq_len = 2, 8
-    sample_input = torch.randn(batch_size, seq_len, hp.embedding_length, dtype=torch.float32)
-    
+    sample_input = torch.randn(
+        batch_size, seq_len, hp.embedding_length, dtype=torch.float32
+    )
+
     # Run IREE vs torch comparison
-    run_iree_vs_torch_fx(module, input_args=(sample_input,), atol=1e-4, rtol=0,
-                        compile_flags=LLM_HIP_COMPILE_FLAGS,)
+    run_iree_vs_torch_fx(
+        module,
+        input_args=(sample_input,),
+        atol=1e-4,
+        rtol=0,
+        compile_flags=LLM_HIP_COMPILE_FLAGS,
+    )
 
 
 if __name__ == "__main__":
diff --git a/sharktank/tests/layers/rms_norm_with_iree_test.py b/sharktank/tests/layers/rms_norm_with_iree_test.py
@@ -22,12 +22,14 @@ def forward(self, x):
         var = (x.to(torch.float32) ** 2).mean(dim=-1, keepdim=True)
         inv = torch.rsqrt(var + self.eps)
         y = x * inv
-        return (y * self.weight)  # broadcast over last dim
+        return y * self.weight  # broadcast over last dim
+
 
 @pytest.mark.parametrize("dtype,atol", [(torch.float32, 1e-4), (torch.bfloat16, 1e-2)])
 def test_rms_norm_iree_vs_eager(dtype, atol):
     torch.manual_seed(42)
     m = RMSNorm(hidden=64, dtype=dtype)
     x = torch.randn(2, 8, 64, dtype=dtype)
-    run_iree_vs_torch_fx(m, input_args=(x,), atol=atol, rtol=0, 
-                         compile_flags=LLM_HIP_COMPILE_FLAGS)
+    run_iree_vs_torch_fx(
+        m, input_args=(x,), atol=atol, rtol=0, compile_flags=LLM_HIP_COMPILE_FLAGS
+    )
diff --git a/sharktank/tests/layers/token_embedding_with_iree_test.py b/sharktank/tests/layers/token_embedding_with_iree_test.py