use get_compressed_attention_impl

kylesayrs · kylesayrs · commit 5aa35865b5f2 · 2025-07-16T17:29:06.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/llmcompressor/modifiers/quantization/quantization/mixin.py b/src/llmcompressor/modifiers/quantization/quantization/mixin.py
@@ -243,7 +243,7 @@ def _initialize_hooks(self, model: torch.nn.Module) -> Set[RemovableHandle]:
         hooks = set()
 
         # TODO: attnq
-        # attention_impl = enable_compressed_attention(model)
+        # attention_impl = get_compressed_attention_impl()
         # hooks |= register_calibrate_attn_hooks(self, attention_impl)
 
         for module in model.modules():