default attn_implementaion to eager to avoid issues

Edwardf0t1 · Edwardf0t1 · commit 3a43b1e67856 · 2025-10-23T00:43:19.000Z
Signed-off-by: Zhiyu Cheng &lt;zhiyuc@nvidia.com&gt;
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
@@ -748,10 +748,10 @@ def output_decode(generated_ids, input_shape):
     parser.add_argument(
         "--attn_implementation",
         help=(
-            "Specify the attention implementation to use."
+            "Specify the attention implementation to use. "
             "This arg will be passed to the HF model loading if specified."
         ),
-        default=None,
+        default="eager",
         type=str,
     )