[https://nvbugs/5740075][fix] Fix sm120 speculation

mikeiovine · mikeiovine · commit 5efd3af12d65 · 2025-12-16T10:38:28.000-05:00
Signed-off-by: Mike Iovine &lt;miovine@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/speculative/interface.py b/tensorrt_llm/_torch/speculative/interface.py
@@ -136,8 +136,9 @@ def extend_ctx(self, attention_backend: Type[AttentionBackend]):
             # 1-model has separate logic for handling draft tokens
             return False
 
+        xqa_supported = get_sm_version() in (90, 100)
         return not issubclass(attention_backend,
-                              TrtllmAttention) or get_sm_version() < 90
+                              TrtllmAttention) or not xqa_supported
 
     def attention_need_spec_dec_mode(
             self,