default attn_implementaion to eager to avoid issues

Edwardf0t1 · Edwardf0t1 · commit 98772b9cb639 · 2025-09-17T21:48:28.000Z
Signed-off-by: Zhiyu Cheng &lt;zhiyuc@nvidia.com&gt;
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
@@ -758,10 +758,10 @@ def output_decode(generated_ids, input_shape):
     parser.add_argument(
         "--attn_implementation",
         help=(
-            "Specify the attention implementation to use."
+            "Specify the attention implementation to use. "
             "This arg will be passed to the HF model loading if specified."
         ),
-        default=None,
+        default="eager",
         type=str,
     )