Update on "Changes to SDPA to support no kv cache export"

tarun292 · tarun292 · commit 84e7573cda0a · 2025-01-06T14:35:15.000-08:00
Differential Revision: [D67878163](https://our.internmc.facebook.com/intern/diff/D67878163) [ghstack-poisoned]
diff --git a/examples/models/llama/source_transformation/sdpa.py b/examples/models/llama/source_transformation/sdpa.py
@@ -9,7 +9,7 @@
 # Example script for exporting Llama2 to flatbuffer
 
 import math
-from typing import Tuple, Union, Optional
+from typing import Optional, Tuple, Union
 
 import torch
 
@@ -24,7 +24,7 @@ def __init__(
         self,
         kv_cache: Optional[Union[KVCache, QuantizedKVCache]] = None,
         dim: int = -1,
-        is_causal = True,
+        is_causal=True,
     ):
         super().__init__()
         # Custom op only supports float32 currently. Converting to/from float32 is
@@ -48,8 +48,8 @@ def forward(
         k: torch.Tensor,
         v: torch.Tensor,
         bsz,
-        seqlen = None,
-        mask = None,
+        seqlen=None,
+        mask=None,
     ):
         # Custom op only supports float32 currently. Converting to/from float32 is
         # faster than not having the op.