Update generate.py

mikekgfb · web-flow · commit ed8ab550df7c · 2025-01-24T15:30:26.000-08:00
Allow math as fallback
diff --git a/torchchat/generate.py b/torchchat/generate.py
@@ -1172,7 +1172,8 @@ def callback(x, *, done_generating=False):
                 prof = torch.profiler.profile()
             t0 = time.perf_counter()
             num_tokens_generated = 0
-            with torch.nn.attention.sdpa_kernel([self.builder_args.attention_backend]), prof:
+            # always allow math as fallback
+            with torch.nn.attention.sdpa_kernel([self.builder_args.attention_backend, torch.nn.attention.SDPBackend.MATH]), prof:
                 generator_func = self.generate(
                     self.model,
                     encoded,